1 files changed, 607 insertions, 598 deletions
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
index 2f608bb..0e3de06 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -22,7 +22,6 @@
 #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h"
 #include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
-#include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/PatternMatch.h"
@@ -70,14 +69,14 @@ materializeBinaryNanCheckIfRequired(OpTy op, PatternRewriter &rewriter,
     return result;
 
   // Unordered comparison of NaN against itself will always return true.
-  Value lhsIsNaN = rewriter.create<arith::CmpFOp>(
-      op.getLoc(), arith::CmpFPredicate::UNO, lhs, lhs);
-  Value rhsIsNaN = rewriter.create<arith::CmpFOp>(
-      op.getLoc(), arith::CmpFPredicate::UNO, rhs, rhs);
+  Value lhsIsNaN = arith::CmpFOp::create(rewriter, op.getLoc(),
+                                         arith::CmpFPredicate::UNO, lhs, lhs);
+  Value rhsIsNaN = arith::CmpFOp::create(rewriter, op.getLoc(),
+                                         arith::CmpFPredicate::UNO, rhs, rhs);
   Value rhsOrResult =
-      rewriter.create<arith::SelectOp>(op.getLoc(), lhsIsNaN, rhs, result);
-  return rewriter.create<arith::SelectOp>(op.getLoc(), rhsIsNaN, lhs,
-                                          rhsOrResult);
+      arith::SelectOp::create(rewriter, op.getLoc(), lhsIsNaN, rhs, result);
+  return arith::SelectOp::create(rewriter, op.getLoc(), rhsIsNaN, lhs,
+                                 rhsOrResult);
 }
 
 static Value createLinalgBodyCalculationForElementwiseOp(
@@ -89,38 +88,38 @@ static Value createLinalgBodyCalculationForElementwiseOp(
 
   // tosa::AbsOp
   if (isa<tosa::AbsOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<math::AbsFOp>(loc, resultTypes, args);
+    return math::AbsFOp::create(rewriter, loc, resultTypes, args);
 
   if (isa<tosa::AbsOp>(op) && isa<IntegerType>(elementTy)) {
-    auto zero = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getZeroAttr(elementTy));
-    auto neg = rewriter.create<arith::SubIOp>(loc, zero, args[0]);
-    return rewriter.create<arith::MaxSIOp>(loc, args[0], neg);
+    auto zero = arith::ConstantOp::create(rewriter, loc,
+                                          rewriter.getZeroAttr(elementTy));
+    auto neg = arith::SubIOp::create(rewriter, loc, zero, args[0]);
+    return arith::MaxSIOp::create(rewriter, loc, args[0], neg);
   }
 
   // tosa::AddOp
   if (isa<tosa::AddOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<arith::AddFOp>(loc, resultTypes, args);
+    return arith::AddFOp::create(rewriter, loc, resultTypes, args);
 
   if (isa<tosa::AddOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::AddIOp>(loc, resultTypes, args);
+    return arith::AddIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::SubOp
   if (isa<tosa::SubOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<arith::SubFOp>(loc, resultTypes, args);
+    return arith::SubFOp::create(rewriter, loc, resultTypes, args);
 
   if (isa<tosa::SubOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::SubIOp>(loc, resultTypes, args);
+    return arith::SubIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::IntDivOp
   if (isa<tosa::IntDivOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::DivSIOp>(loc, resultTypes, args);
+    return arith::DivSIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::ReciprocalOp
   if (isa<tosa::ReciprocalOp>(op) && isa<FloatType>(elementTy)) {
     auto one =
-        rewriter.create<arith::ConstantOp>(loc, FloatAttr::get(elementTy, 1));
-    return rewriter.create<arith::DivFOp>(loc, resultTypes, one, args[0]);
+        arith::ConstantOp::create(rewriter, loc, FloatAttr::get(elementTy, 1));
+    return arith::DivFOp::create(rewriter, loc, resultTypes, one, args[0]);
   }
 
   // tosa::MulOp
@@ -140,7 +139,8 @@ static Value createLinalgBodyCalculationForElementwiseOp(
                                           "Cannot have shift value for float");
         return nullptr;
       }
-      return rewriter.create<arith::MulFOp>(loc, resultTypes, args[0], args[1]);
+      return arith::MulFOp::create(rewriter, loc, resultTypes, args[0],
+                                   args[1]);
     }
 
     if (isa<IntegerType>(elementTy)) {
@@ -149,21 +149,21 @@ static Value createLinalgBodyCalculationForElementwiseOp(
 
       if (shift > 0) {
         auto shiftConst =
-            rewriter.create<arith::ConstantIntOp>(loc, shift, /*bitwidth=*/8);
+            arith::ConstantIntOp::create(rewriter, loc, shift, /*bitwidth=*/8);
         if (!a.getType().isInteger(32))
-          a = rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), a);
+          a = arith::ExtSIOp::create(rewriter, loc, rewriter.getI32Type(), a);
 
         if (!b.getType().isInteger(32))
-          b = rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), b);
+          b = arith::ExtSIOp::create(rewriter, loc, rewriter.getI32Type(), b);
 
-        auto result = rewriter.create<tosa::ApplyScaleOp>(
-            loc, rewriter.getI32Type(), a, b, shiftConst,
+        auto result = tosa::ApplyScaleOp::create(
+            rewriter, loc, rewriter.getI32Type(), a, b, shiftConst,
             rewriter.getStringAttr("SINGLE_ROUND"));
 
         if (elementTy.isInteger(32))
           return result;
 
-        return rewriter.create<arith::TruncIOp>(loc, elementTy, result);
+        return arith::TruncIOp::create(rewriter, loc, elementTy, result);
       }
 
       int aWidth = a.getType().getIntOrFloatBitWidth();
@@ -171,11 +171,11 @@ static Value createLinalgBodyCalculationForElementwiseOp(
       int cWidth = resultTypes[0].getIntOrFloatBitWidth();
 
       if (aWidth < cWidth)
-        a = rewriter.create<arith::ExtSIOp>(loc, resultTypes[0], a);
+        a = arith::ExtSIOp::create(rewriter, loc, resultTypes[0], a);
       if (bWidth < cWidth)
-        b = rewriter.create<arith::ExtSIOp>(loc, resultTypes[0], b);
+        b = arith::ExtSIOp::create(rewriter, loc, resultTypes[0], b);
 
-      return rewriter.create<arith::MulIOp>(loc, resultTypes, a, b);
+      return arith::MulIOp::create(rewriter, loc, resultTypes, a, b);
     }
   }
 
@@ -201,14 +201,14 @@ static Value createLinalgBodyCalculationForElementwiseOp(
     int64_t outZp = *maybeOutZp;
 
     if (isa<FloatType>(elementTy))
-      return rewriter.create<arith::NegFOp>(loc, resultTypes, args[0]);
+      return arith::NegFOp::create(rewriter, loc, resultTypes, args[0]);
 
     if (isa<IntegerType>(elementTy)) {
       if (!inZp && !outZp) {
-        auto constant = rewriter.create<arith::ConstantOp>(
-            loc, IntegerAttr::get(elementTy, 0));
-        return rewriter.create<arith::SubIOp>(loc, resultTypes, constant,
-                                              args[0]);
+        auto constant = arith::ConstantOp::create(
+            rewriter, loc, IntegerAttr::get(elementTy, 0));
+        return arith::SubIOp::create(rewriter, loc, resultTypes, constant,
+                                     args[0]);
       }
 
       // Compute the maximum value that can occur in the intermediate buffer.
@@ -231,214 +231,214 @@ static Value createLinalgBodyCalculationForElementwiseOp(
       }
 
       Type intermediateType = rewriter.getIntegerType(intermediateBitWidth);
-      Value zpAddValue = rewriter.create<arith::ConstantOp>(
-          loc, rewriter.getIntegerAttr(intermediateType, zpAdd));
+      Value zpAddValue = arith::ConstantOp::create(
+          rewriter, loc, rewriter.getIntegerAttr(intermediateType, zpAdd));
 
       // The negation can be applied by doing:
       //  outputValue = inZp + outZp - inputValue
       auto ext =
-          rewriter.create<arith::ExtSIOp>(loc, intermediateType, args[0]);
-      auto sub = rewriter.create<arith::SubIOp>(loc, zpAddValue, ext);
+          arith::ExtSIOp::create(rewriter, loc, intermediateType, args[0]);
+      auto sub = arith::SubIOp::create(rewriter, loc, zpAddValue, ext);
 
       // Clamp to the negation range.
-      Value min = rewriter.create<arith::ConstantIntOp>(
-          loc, intermediateType,
+      Value min = arith::ConstantIntOp::create(
+          rewriter, loc, intermediateType,
           APInt::getSignedMinValue(inputBitWidth).getSExtValue());
-      Value max = rewriter.create<arith::ConstantIntOp>(
-          loc, intermediateType,
+      Value max = arith::ConstantIntOp::create(
+          rewriter, loc, intermediateType,
           APInt::getSignedMaxValue(inputBitWidth).getSExtValue());
       auto clamp = clampIntHelper(loc, sub, min, max, rewriter, false);
 
       // Truncate to the final value.
-      return rewriter.create<arith::TruncIOp>(loc, elementTy, clamp);
+      return arith::TruncIOp::create(rewriter, loc, elementTy, clamp);
     }
   }
 
   // tosa::BitwiseAndOp
   if (isa<tosa::BitwiseAndOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::AndIOp>(loc, resultTypes, args);
+    return arith::AndIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::BitwiseOrOp
   if (isa<tosa::BitwiseOrOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::OrIOp>(loc, resultTypes, args);
+    return arith::OrIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::BitwiseNotOp
   if (isa<tosa::BitwiseNotOp>(op) && isa<IntegerType>(elementTy)) {
     auto allOnesAttr = rewriter.getIntegerAttr(
         elementTy, APInt::getAllOnes(elementTy.getIntOrFloatBitWidth()));
-    auto allOnes = rewriter.create<arith::ConstantOp>(loc, allOnesAttr);
-    return rewriter.create<arith::XOrIOp>(loc, resultTypes, args[0], allOnes);
+    auto allOnes = arith::ConstantOp::create(rewriter, loc, allOnesAttr);
+    return arith::XOrIOp::create(rewriter, loc, resultTypes, args[0], allOnes);
   }
 
   // tosa::BitwiseXOrOp
   if (isa<tosa::BitwiseXorOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::XOrIOp>(loc, resultTypes, args);
+    return arith::XOrIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::LogicalLeftShiftOp
   if (isa<tosa::LogicalLeftShiftOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::ShLIOp>(loc, resultTypes, args);
+    return arith::ShLIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::LogicalRightShiftOp
   if (isa<tosa::LogicalRightShiftOp>(op) && isa<IntegerType>(elementTy))
-    return rewriter.create<arith::ShRUIOp>(loc, resultTypes, args);
+    return arith::ShRUIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::ArithmeticRightShiftOp
   if (isa<tosa::ArithmeticRightShiftOp>(op) && isa<IntegerType>(elementTy)) {
-    auto result = rewriter.create<arith::ShRSIOp>(loc, resultTypes, args);
+    auto result = arith::ShRSIOp::create(rewriter, loc, resultTypes, args);
     auto round = cast<BoolAttr>(op->getAttr("round")).getValue();
     if (!round) {
       return result;
     }
 
     Type i1Ty = IntegerType::get(rewriter.getContext(), /*width=*/1);
-    auto one =
-        rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(elementTy, 1));
-    auto zero =
-        rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(elementTy, 0));
+    auto one = arith::ConstantOp::create(rewriter, loc,
+                                         IntegerAttr::get(elementTy, 1));
+    auto zero = arith::ConstantOp::create(rewriter, loc,
+                                          IntegerAttr::get(elementTy, 0));
     auto i1one =
-        rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(i1Ty, 1));
+        arith::ConstantOp::create(rewriter, loc, IntegerAttr::get(i1Ty, 1));
 
     // Checking that input2 != 0
-    auto shiftValueGreaterThanZero = rewriter.create<arith::CmpIOp>(
-        loc, arith::CmpIPredicate::sgt, args[1], zero);
+    auto shiftValueGreaterThanZero = arith::CmpIOp::create(
+        rewriter, loc, arith::CmpIPredicate::sgt, args[1], zero);
 
     // Checking for the last bit of input1 to be 1
     auto subtract =
-        rewriter.create<arith::SubIOp>(loc, resultTypes, args[1], one);
+        arith::SubIOp::create(rewriter, loc, resultTypes, args[1], one);
     auto shifted =
-        rewriter.create<arith::ShRSIOp>(loc, resultTypes, args[0], subtract)
+        arith::ShRSIOp::create(rewriter, loc, resultTypes, args[0], subtract)
             ->getResults();
-    auto truncated = rewriter.create<arith::TruncIOp>(
-        loc, i1Ty, shifted, ArrayRef<NamedAttribute>());
+    auto truncated = arith::TruncIOp::create(rewriter, loc, i1Ty, shifted,
+                                             ArrayRef<NamedAttribute>());
     auto isInputOdd =
-        rewriter.create<arith::AndIOp>(loc, i1Ty, truncated, i1one);
+        arith::AndIOp::create(rewriter, loc, i1Ty, truncated, i1one);
 
-    auto shouldRound = rewriter.create<arith::AndIOp>(
-        loc, i1Ty, shiftValueGreaterThanZero, isInputOdd);
+    auto shouldRound = arith::AndIOp::create(
+        rewriter, loc, i1Ty, shiftValueGreaterThanZero, isInputOdd);
     auto extended =
-        rewriter.create<arith::ExtUIOp>(loc, resultTypes, shouldRound);
-    return rewriter.create<arith::AddIOp>(loc, resultTypes, result, extended);
+        arith::ExtUIOp::create(rewriter, loc, resultTypes, shouldRound);
+    return arith::AddIOp::create(rewriter, loc, resultTypes, result, extended);
   }
 
   // tosa::ClzOp
   if (isa<tosa::ClzOp>(op) && isa<IntegerType>(elementTy)) {
-    return rewriter.create<math::CountLeadingZerosOp>(loc, elementTy, args[0]);
+    return math::CountLeadingZerosOp::create(rewriter, loc, elementTy, args[0]);
   }
 
   // tosa::LogicalAnd
   if (isa<tosa::LogicalAndOp>(op) && elementTy.isInteger(1))
-    return rewriter.create<arith::AndIOp>(loc, resultTypes, args);
+    return arith::AndIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::LogicalNot
   if (isa<tosa::LogicalNotOp>(op) && elementTy.isInteger(1)) {
-    auto one = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getIntegerAttr(elementTy, 1));
-    return rewriter.create<arith::XOrIOp>(loc, resultTypes, args[0], one);
+    auto one = arith::ConstantOp::create(rewriter, loc,
+                                         rewriter.getIntegerAttr(elementTy, 1));
+    return arith::XOrIOp::create(rewriter, loc, resultTypes, args[0], one);
   }
 
   // tosa::LogicalOr
   if (isa<tosa::LogicalOrOp>(op) && elementTy.isInteger(1))
-    return rewriter.create<arith::OrIOp>(loc, resultTypes, args);
+    return arith::OrIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::LogicalXor
   if (isa<tosa::LogicalXorOp>(op) && elementTy.isInteger(1))
-    return rewriter.create<arith::XOrIOp>(loc, resultTypes, args);
+    return arith::XOrIOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::PowOp
   if (isa<tosa::PowOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::PowFOp>(loc, resultTypes, args);
+    return mlir::math::PowFOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::RsqrtOp
   if (isa<tosa::RsqrtOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::RsqrtOp>(loc, resultTypes, args);
+    return mlir::math::RsqrtOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::LogOp
   if (isa<tosa::LogOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::LogOp>(loc, resultTypes, args);
+    return mlir::math::LogOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::ExpOp
   if (isa<tosa::ExpOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::ExpOp>(loc, resultTypes, args);
+    return mlir::math::ExpOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::SinOp
   if (isa<tosa::SinOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::SinOp>(loc, resultTypes, args);
+    return mlir::math::SinOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::CosOp
   if (isa<tosa::CosOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::CosOp>(loc, resultTypes, args);
+    return mlir::math::CosOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::TanhOp
   if (isa<tosa::TanhOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::TanhOp>(loc, resultTypes, args);
+    return mlir::math::TanhOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::ErfOp
   if (isa<tosa::ErfOp>(op) && llvm::isa<FloatType>(elementTy))
-    return rewriter.create<mlir::math::ErfOp>(loc, resultTypes, args);
+    return mlir::math::ErfOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::GreaterOp
   if (isa<tosa::GreaterOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGT,
-                                          args[0], args[1]);
+    return arith::CmpFOp::create(rewriter, loc, arith::CmpFPredicate::OGT,
+                                 args[0], args[1]);
 
   if (isa<tosa::GreaterOp>(op) && elementTy.isSignlessInteger())
-    return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sgt,
-                                          args[0], args[1]);
+    return arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::sgt,
+                                 args[0], args[1]);
 
   // tosa::GreaterEqualOp
   if (isa<tosa::GreaterEqualOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
-                                          args[0], args[1]);
+    return arith::CmpFOp::create(rewriter, loc, arith::CmpFPredicate::OGE,
+                                 args[0], args[1]);
 
   if (isa<tosa::GreaterEqualOp>(op) && elementTy.isSignlessInteger())
-    return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sge,
-                                          args[0], args[1]);
+    return arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::sge,
+                                 args[0], args[1]);
 
   // tosa::EqualOp
   if (isa<tosa::EqualOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
-                                          args[0], args[1]);
+    return arith::CmpFOp::create(rewriter, loc, arith::CmpFPredicate::OEQ,
+                                 args[0], args[1]);
 
   if (isa<tosa::EqualOp>(op) && elementTy.isSignlessInteger())
-    return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
-                                          args[0], args[1]);
+    return arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::eq,
+                                 args[0], args[1]);
 
   // tosa::SelectOp
   if (isa<tosa::SelectOp>(op)) {
     elementTy = cast<ShapedType>(op->getOperand(1).getType()).getElementType();
     if (isa<FloatType>(elementTy) || isa<IntegerType>(elementTy))
-      return rewriter.create<arith::SelectOp>(loc, args[0], args[1], args[2]);
+      return arith::SelectOp::create(rewriter, loc, args[0], args[1], args[2]);
   }
 
   // tosa::MaximumOp
   if (isa<tosa::MaximumOp>(op) && isa<FloatType>(elementTy)) {
-    auto max = rewriter.create<arith::MaximumFOp>(loc, args[0], args[1]);
+    auto max = arith::MaximumFOp::create(rewriter, loc, args[0], args[1]);
     return materializeBinaryNanCheckIfRequired(llvm::cast<tosa::MaximumOp>(op),
                                                rewriter, args[0], args[1], max);
   }
 
   if (isa<tosa::MaximumOp>(op) && elementTy.isSignlessInteger()) {
-    return rewriter.create<arith::MaxSIOp>(loc, args[0], args[1]);
+    return arith::MaxSIOp::create(rewriter, loc, args[0], args[1]);
   }
 
   // tosa::MinimumOp
   if (isa<tosa::MinimumOp>(op) && isa<FloatType>(elementTy)) {
-    auto min = rewriter.create<arith::MinimumFOp>(loc, args[0], args[1]);
+    auto min = arith::MinimumFOp::create(rewriter, loc, args[0], args[1]);
     return materializeBinaryNanCheckIfRequired(llvm::cast<tosa::MinimumOp>(op),
                                                rewriter, args[0], args[1], min);
   }
 
   if (isa<tosa::MinimumOp>(op) && elementTy.isSignlessInteger()) {
-    return rewriter.create<arith::MinSIOp>(loc, args[0], args[1]);
+    return arith::MinSIOp::create(rewriter, loc, args[0], args[1]);
   }
 
   // tosa::CeilOp
   if (isa<tosa::CeilOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<math::CeilOp>(loc, resultTypes, args);
+    return math::CeilOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::FloorOp
   if (isa<tosa::FloorOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<math::FloorOp>(loc, resultTypes, args);
+    return math::FloorOp::create(rewriter, loc, resultTypes, args);
 
   // tosa::ClampOp
   if (isa<tosa::ClampOp>(op) && isa<FloatType>(elementTy)) {
@@ -449,10 +449,10 @@ static Value createLinalgBodyCalculationForElementwiseOp(
                    APFloat::rmNearestTiesToEven, &losesInfo);
     maxApf.convert(cast<FloatType>(elementTy).getFloatSemantics(),
                    APFloat::rmNearestTiesToEven, &losesInfo);
-    auto min = rewriter.create<arith::ConstantOp>(
-        loc, elementTy, rewriter.getFloatAttr(elementTy, minApf));
-    auto max = rewriter.create<arith::ConstantOp>(
-        loc, elementTy, rewriter.getFloatAttr(elementTy, maxApf));
+    auto min = arith::ConstantOp::create(
+        rewriter, loc, elementTy, rewriter.getFloatAttr(elementTy, minApf));
+    auto max = arith::ConstantOp::create(
+        rewriter, loc, elementTy, rewriter.getFloatAttr(elementTy, maxApf));
     auto result = clampFloatHelper(loc, args[0], min, max, rewriter);
 
     auto clampOp = llvm::cast<tosa::ClampOp>(op);
@@ -478,11 +478,11 @@ static Value createLinalgBodyCalculationForElementwiseOp(
     //   return init if x == NaN else result
 
     // Unordered comparison of NaN against itself will always return true.
-    Value isNaN = rewriter.create<arith::CmpFOp>(
-        op->getLoc(), arith::CmpFPredicate::UNO, args[0], args[0]);
+    Value isNaN = arith::CmpFOp::create(
+        rewriter, op->getLoc(), arith::CmpFPredicate::UNO, args[0], args[0]);
     // TOSA specifies that in "ignore" NaN mode the result is "min" if the input
     // is NaN.
-    return rewriter.create<arith::SelectOp>(op->getLoc(), isNaN, min, result);
+    return arith::SelectOp::create(rewriter, op->getLoc(), isNaN, min, result);
   }
 
   if (isa<tosa::ClampOp>(op) && isa<IntegerType>(elementTy)) {
@@ -515,10 +515,10 @@ static Value createLinalgBodyCalculationForElementwiseOp(
     min = std::min(min, maxRepresentable);
     max = std::min(max, maxRepresentable);
 
-    auto minVal = rewriter.create<arith::ConstantIntOp>(
-        loc, min, intTy.getIntOrFloatBitWidth());
-    auto maxVal = rewriter.create<arith::ConstantIntOp>(
-        loc, max, intTy.getIntOrFloatBitWidth());
+    auto minVal = arith::ConstantIntOp::create(rewriter, loc, min,
+                                               intTy.getIntOrFloatBitWidth());
+    auto maxVal = arith::ConstantIntOp::create(rewriter, loc, max,
+                                               intTy.getIntOrFloatBitWidth());
     return clampIntHelper(loc, args[0], minVal, maxVal, rewriter,
                           intTy.isUnsignedInteger());
   }
@@ -526,11 +526,11 @@ static Value createLinalgBodyCalculationForElementwiseOp(
   // tosa::SigmoidOp
   if (isa<tosa::SigmoidOp>(op) && isa<FloatType>(elementTy)) {
     auto one =
-        rewriter.create<arith::ConstantOp>(loc, FloatAttr::get(elementTy, 1));
-    auto negate = rewriter.create<arith::NegFOp>(loc, resultTypes, args[0]);
-    auto exp = rewriter.create<mlir::math::ExpOp>(loc, resultTypes, negate);
-    auto added = rewriter.create<arith::AddFOp>(loc, resultTypes, exp, one);
-    return rewriter.create<arith::DivFOp>(loc, resultTypes, one, added);
+        arith::ConstantOp::create(rewriter, loc, FloatAttr::get(elementTy, 1));
+    auto negate = arith::NegFOp::create(rewriter, loc, resultTypes, args[0]);
+    auto exp = mlir::math::ExpOp::create(rewriter, loc, resultTypes, negate);
+    auto added = arith::AddFOp::create(rewriter, loc, resultTypes, exp, one);
+    return arith::DivFOp::create(rewriter, loc, resultTypes, one, added);
   }
 
   // tosa::CastOp
@@ -549,50 +549,49 @@ static Value createLinalgBodyCalculationForElementwiseOp(
       return args.front();
 
     if (isa<FloatType>(srcTy) && isa<FloatType>(dstTy) && bitExtend)
-      return rewriter.create<arith::ExtFOp>(loc, resultTypes, args,
-                                            ArrayRef<NamedAttribute>());
+      return arith::ExtFOp::create(rewriter, loc, resultTypes, args,
+                                   ArrayRef<NamedAttribute>());
 
     if (isa<FloatType>(srcTy) && isa<FloatType>(dstTy) && !bitExtend)
-      return rewriter.create<arith::TruncFOp>(loc, resultTypes, args,
-                                              ArrayRef<NamedAttribute>());
+      return arith::TruncFOp::create(rewriter, loc, resultTypes, args,
+                                     ArrayRef<NamedAttribute>());
 
     // 1-bit integers need to be treated as signless.
     if (srcTy.isInteger(1) && arith::UIToFPOp::areCastCompatible(srcTy, dstTy))
-      return rewriter.create<arith::UIToFPOp>(loc, resultTypes, args,
-                                              ArrayRef<NamedAttribute>());
+      return arith::UIToFPOp::create(rewriter, loc, resultTypes, args,
+                                     ArrayRef<NamedAttribute>());
 
     if (srcTy.isInteger(1) && isa<IntegerType>(dstTy) && bitExtend)
-      return rewriter.create<arith::ExtUIOp>(loc, resultTypes, args,
-                                             ArrayRef<NamedAttribute>());
+      return arith::ExtUIOp::create(rewriter, loc, resultTypes, args,
+                                    ArrayRef<NamedAttribute>());
 
     // Unsigned integers need an unrealized cast so that they can be passed
     // to UIToFP.
     if (srcTy.isUnsignedInteger() && isa<FloatType>(dstTy)) {
       auto unrealizedCast =
-          rewriter
-              .create<UnrealizedConversionCastOp>(
-                  loc, rewriter.getIntegerType(srcTy.getIntOrFloatBitWidth()),
-                  args[0])
+          UnrealizedConversionCastOp::create(
+              rewriter, loc,
+              rewriter.getIntegerType(srcTy.getIntOrFloatBitWidth()), args[0])
               .getResult(0);
-      return rewriter.create<arith::UIToFPOp>(loc, resultTypes[0],
-                                              unrealizedCast);
+      return arith::UIToFPOp::create(rewriter, loc, resultTypes[0],
+                                     unrealizedCast);
     }
 
     // All other si-to-fp conversions should be handled by SIToFP.
     if (arith::SIToFPOp::areCastCompatible(srcTy, dstTy))
-      return rewriter.create<arith::SIToFPOp>(loc, resultTypes, args,
-                                              ArrayRef<NamedAttribute>());
+      return arith::SIToFPOp::create(rewriter, loc, resultTypes, args,
+                                     ArrayRef<NamedAttribute>());
 
     // Casting to boolean, floats need to only be checked as not-equal to zero.
     if (isa<FloatType>(srcTy) && dstTy.isInteger(1)) {
-      Value zero = rewriter.create<arith::ConstantOp>(
-          loc, rewriter.getFloatAttr(srcTy, 0.0));
-      return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::UNE,
-                                            args.front(), zero);
+      Value zero = arith::ConstantOp::create(rewriter, loc,
+                                             rewriter.getFloatAttr(srcTy, 0.0));
+      return arith::CmpFOp::create(rewriter, loc, arith::CmpFPredicate::UNE,
+                                   args.front(), zero);
     }
 
     if (arith::FPToSIOp::areCastCompatible(srcTy, dstTy)) {
-      auto rounded = rewriter.create<math::RoundEvenOp>(loc, args[0]);
+      auto rounded = math::RoundEvenOp::create(rewriter, loc, args[0]);
 
       const auto &fltSemantics = cast<FloatType>(srcTy).getFloatSemantics();
       // Check whether neither int min nor int max can be represented in the
@@ -601,37 +600,42 @@ static Value createLinalgBodyCalculationForElementwiseOp(
           APFloat::semanticsMaxExponent(fltSemantics)) {
         // Use cmp + select to replace infinites by int min / int max. Other
         // integral values can be represented in the integer space.
-        auto conv = rewriter.create<arith::FPToSIOp>(loc, dstTy, rounded);
-        auto posInf = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getFloatAttr(getElementTypeOrSelf(srcTy),
-                                       APFloat::getInf(fltSemantics)));
-        auto negInf = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getFloatAttr(
-                     getElementTypeOrSelf(srcTy),
-                     APFloat::getInf(fltSemantics, /*Negative=*/true)));
-        auto overflow = rewriter.create<arith::CmpFOp>(
-            loc, arith::CmpFPredicate::UEQ, rounded, posInf);
-        auto underflow = rewriter.create<arith::CmpFOp>(
-            loc, arith::CmpFPredicate::UEQ, rounded, negInf);
-        auto intMin = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getIntegerAttr(
-                     getElementTypeOrSelf(dstTy),
-                     APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth())));
-        auto intMax = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getIntegerAttr(
-                     getElementTypeOrSelf(dstTy),
-                     APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())));
+        auto conv = arith::FPToSIOp::create(rewriter, loc, dstTy, rounded);
+        auto posInf = arith::ConstantOp::create(
+            rewriter, loc,
+            rewriter.getFloatAttr(getElementTypeOrSelf(srcTy),
+                                  APFloat::getInf(fltSemantics)));
+        auto negInf = arith::ConstantOp::create(
+            rewriter, loc,
+            rewriter.getFloatAttr(
+                getElementTypeOrSelf(srcTy),
+                APFloat::getInf(fltSemantics, /*Negative=*/true)));
+        auto overflow = arith::CmpFOp::create(
+            rewriter, loc, arith::CmpFPredicate::UEQ, rounded, posInf);
+        auto underflow = arith::CmpFOp::create(
+            rewriter, loc, arith::CmpFPredicate::UEQ, rounded, negInf);
+        auto intMin = arith::ConstantOp::create(
+            rewriter, loc,
+            rewriter.getIntegerAttr(
+                getElementTypeOrSelf(dstTy),
+                APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth())));
+        auto intMax = arith::ConstantOp::create(
+            rewriter, loc,
+            rewriter.getIntegerAttr(
+                getElementTypeOrSelf(dstTy),
+                APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())));
         auto maxClamped =
-            rewriter.create<arith::SelectOp>(loc, overflow, intMax, conv);
-        return rewriter.create<arith::SelectOp>(loc, underflow, intMin,
-                                                maxClamped);
+            arith::SelectOp::create(rewriter, loc, overflow, intMax, conv);
+        return arith::SelectOp::create(rewriter, loc, underflow, intMin,
+                                       maxClamped);
       }
 
-      auto intMinFP = rewriter.create<arith::ConstantOp>(
-          loc, rewriter.getFloatAttr(
-                   getElementTypeOrSelf(srcTy),
-                   APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth())
-                       .getSExtValue()));
+      auto intMinFP = arith::ConstantOp::create(
+          rewriter, loc,
+          rewriter.getFloatAttr(
+              getElementTypeOrSelf(srcTy),
+              APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth())
+                  .getSExtValue()));
 
       // Check whether the mantissa has enough bits to represent int max.
       if (cast<FloatType>(srcTy).getFPMantissaWidth() >=
@@ -640,58 +644,61 @@ static Value createLinalgBodyCalculationForElementwiseOp(
         // consists of a single leading bit. Therefore we can clamp the input
         // in the floating-point domain.
 
-        auto intMaxFP = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getFloatAttr(
-                     getElementTypeOrSelf(srcTy),
-                     APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())
-                         .getSExtValue()));
+        auto intMaxFP = arith::ConstantOp::create(
+            rewriter, loc,
+            rewriter.getFloatAttr(
+                getElementTypeOrSelf(srcTy),
+                APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())
+                    .getSExtValue()));
 
         Value clamped =
             clampFloatHelper(loc, rounded, intMinFP, intMaxFP, rewriter);
-        return rewriter.create<arith::FPToSIOp>(loc, dstTy, clamped);
+        return arith::FPToSIOp::create(rewriter, loc, dstTy, clamped);
       }
 
       // Due to earlier check we know exponant range is big enough to represent
       // int min. We can therefore rely on int max + 1 being representable as
       // well because it's just int min with a positive sign. So clamp the min
       // value and compare against that to select the max int value if needed.
-      auto intMaxPlusOneFP = rewriter.create<arith::ConstantOp>(
-          loc, rewriter.getFloatAttr(
-                   getElementTypeOrSelf(srcTy),
-                   static_cast<double>(
-                       APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())
-                           .getSExtValue()) +
-                       1.0f));
-
-      auto intMax = rewriter.create<arith::ConstantOp>(
-          loc, rewriter.getIntegerAttr(
-                   getElementTypeOrSelf(dstTy),
-                   APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())));
+      auto intMaxPlusOneFP = arith::ConstantOp::create(
+          rewriter, loc,
+          rewriter.getFloatAttr(
+              getElementTypeOrSelf(srcTy),
+              static_cast<double>(
+                  APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())
+                      .getSExtValue()) +
+                  1.0f));
+
+      auto intMax = arith::ConstantOp::create(
+          rewriter, loc,
+          rewriter.getIntegerAttr(
+              getElementTypeOrSelf(dstTy),
+              APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())));
       auto minClampedFP =
-          rewriter.create<arith::MaximumFOp>(loc, rounded, intMinFP);
+          arith::MaximumFOp::create(rewriter, loc, rounded, intMinFP);
       auto minClamped =
-          rewriter.create<arith::FPToSIOp>(loc, dstTy, minClampedFP);
-      auto overflow = rewriter.create<arith::CmpFOp>(
-          loc, arith::CmpFPredicate::UGE, rounded, intMaxPlusOneFP);
-      return rewriter.create<arith::SelectOp>(loc, overflow, intMax,
-                                              minClamped);
+          arith::FPToSIOp::create(rewriter, loc, dstTy, minClampedFP);
+      auto overflow = arith::CmpFOp::create(
+          rewriter, loc, arith::CmpFPredicate::UGE, rounded, intMaxPlusOneFP);
+      return arith::SelectOp::create(rewriter, loc, overflow, intMax,
+                                     minClamped);
     }
 
     // Casting to boolean, integers need to only be checked as not-equal to
     // zero.
     if (isa<IntegerType>(srcTy) && dstTy.isInteger(1)) {
-      Value zero = rewriter.create<arith::ConstantIntOp>(
-          loc, 0, srcTy.getIntOrFloatBitWidth());
-      return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ne,
-                                            args.front(), zero);
+      Value zero = arith::ConstantIntOp::create(rewriter, loc, 0,
+                                                srcTy.getIntOrFloatBitWidth());
+      return arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::ne,
+                                   args.front(), zero);
     }
 
     if (isa<IntegerType>(srcTy) && isa<IntegerType>(dstTy) && bitExtend)
-      return rewriter.create<arith::ExtSIOp>(loc, resultTypes, args,
-                                             ArrayRef<NamedAttribute>());
+      return arith::ExtSIOp::create(rewriter, loc, resultTypes, args,
+                                    ArrayRef<NamedAttribute>());
 
     if (isa<IntegerType>(srcTy) && isa<IntegerType>(dstTy) && !bitExtend) {
-      return rewriter.create<arith::TruncIOp>(loc, dstTy, args[0]);
+      return arith::TruncIOp::create(rewriter, loc, dstTy, args[0]);
     }
   }
 
@@ -710,14 +717,14 @@ static Value createIndex(PatternRewriter &rewriter, Location loc,
   auto [it, inserted] = indexPool.try_emplace(index);
   if (inserted)
     it->second =
-        rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(index));
+        arith::ConstantOp::create(rewriter, loc, rewriter.getIndexAttr(index));
   return it->second;
 }
 
 static Value getTensorDim(PatternRewriter &rewriter, Location loc,
                           IndexPool &indexPool, Value tensor, int64_t index) {
   auto indexValue = createIndex(rewriter, loc, indexPool, index);
-  return rewriter.create<tensor::DimOp>(loc, tensor, indexValue).getResult();
+  return tensor::DimOp::create(rewriter, loc, tensor, indexValue).getResult();
 }
 
 static OpFoldResult getOrFoldTensorDim(PatternRewriter &rewriter, Location loc,
@@ -783,7 +790,7 @@ computeTargetSize(PatternRewriter &rewriter, Location loc, IndexPool &indexPool,
   for (size_t i = 1; i < operandsWithDynamicDim.size(); i++) {
     auto nextSize =
         getTensorDim(rewriter, loc, indexPool, operandsWithDynamicDim[i], dim);
-    targetSize = rewriter.create<arith::MaxUIOp>(loc, targetSize, nextSize);
+    targetSize = arith::MaxUIOp::create(rewriter, loc, targetSize, nextSize);
   }
   return {targetSize, nullptr};
 }
@@ -838,8 +845,8 @@ static Value broadcastDynamicDimension(PatternRewriter &rewriter, Location loc,
   // Check if broadcast is necessary
   auto one = createIndex(rewriter, loc, indexPool, 1);
   auto runtimeSize = getTensorDim(rewriter, loc, indexPool, operand, dim);
-  auto broadcastNecessary = rewriter.create<arith::CmpIOp>(
-      loc, arith::CmpIPredicate::eq, runtimeSize, one);
+  auto broadcastNecessary = arith::CmpIOp::create(
+      rewriter, loc, arith::CmpIPredicate::eq, runtimeSize, one);
 
   // Emit 'then' region of 'scf.if'
   auto emitThenRegion = [&](OpBuilder &opBuilder, Location loc) {
@@ -855,19 +862,18 @@ static Value broadcastDynamicDimension(PatternRewriter &rewriter, Location loc,
                                                     operand, index);
       outputTensorShape.push_back(size);
     }
-    Value outputTensor = opBuilder.create<tensor::EmptyOp>(
-        loc, outputTensorShape, rankedTensorType.getElementType());
+    Value outputTensor = tensor::EmptyOp::create(
+        opBuilder, loc, outputTensorShape, rankedTensorType.getElementType());
 
     // Emit 'linalg.generic' op
     auto resultTensor =
-        opBuilder
-            .create<linalg::GenericOp>(
-                loc, outputTensor.getType(), operand, outputTensor, affineMaps,
-                getNParallelLoopsAttrs(rank),
-                [&](OpBuilder &opBuilder, Location loc, ValueRange blockArgs) {
-                  // Emit 'linalg.yield' op
-                  opBuilder.create<linalg::YieldOp>(loc, blockArgs.front());
-                })
+        linalg::GenericOp::create(
+            opBuilder, loc, outputTensor.getType(), operand, outputTensor,
+            affineMaps, getNParallelLoopsAttrs(rank),
+            [&](OpBuilder &opBuilder, Location loc, ValueRange blockArgs) {
+              // Emit 'linalg.yield' op
+              linalg::YieldOp::create(opBuilder, loc, blockArgs.front());
+            })
             .getResult(0);
 
     // Cast to original operand type if necessary
@@ -875,17 +881,17 @@ static Value broadcastDynamicDimension(PatternRewriter &rewriter, Location loc,
         loc, operand.getType(), resultTensor);
 
     // Emit 'scf.yield' op
-    opBuilder.create<scf::YieldOp>(loc, castResultTensor);
+    scf::YieldOp::create(opBuilder, loc, castResultTensor);
   };
 
   // Emit 'else' region of 'scf.if'
   auto emitElseRegion = [&](OpBuilder &opBuilder, Location loc) {
-    opBuilder.create<scf::YieldOp>(loc, operand);
+    scf::YieldOp::create(opBuilder, loc, operand);
   };
 
   // Emit 'scf.if' op
-  auto ifOp = rewriter.create<scf::IfOp>(loc, broadcastNecessary,
-                                         emitThenRegion, emitElseRegion);
+  auto ifOp = scf::IfOp::create(rewriter, loc, broadcastNecessary,
+                                emitThenRegion, emitElseRegion);
   return ifOp.getResult(0);
 }
 
@@ -930,8 +936,8 @@ emitElementwiseComputation(ConversionPatternRewriter &rewriter, Location loc,
   if (!resultType) {
     return rewriter.notifyMatchFailure(operation, "failed to convert type");
   }
-  Value outputTensor = rewriter.create<tensor::EmptyOp>(
-      loc, targetShape, resultType.getElementType());
+  Value outputTensor = tensor::EmptyOp::create(rewriter, loc, targetShape,
+                                               resultType.getElementType());
 
   // Create affine maps. Input affine maps broadcast static dimensions of size
   // 1. The output affine map is an identity map.
@@ -957,8 +963,8 @@ emitElementwiseComputation(ConversionPatternRewriter &rewriter, Location loc,
 
   // Emit 'linalg.generic' op
   bool encounteredError = false;
-  auto linalgOp = rewriter.create<linalg::GenericOp>(
-      loc, outputTensor.getType(), operands, outputTensor, affineMaps,
+  auto linalgOp = linalg::GenericOp::create(
+      rewriter, loc, outputTensor.getType(), operands, outputTensor, affineMaps,
       getNParallelLoopsAttrs(rank),
       [&](OpBuilder &opBuilder, Location loc, ValueRange blockArgs) {
         Value opResult = createLinalgBodyCalculationForElementwiseOp(
@@ -968,7 +974,7 @@ emitElementwiseComputation(ConversionPatternRewriter &rewriter, Location loc,
           encounteredError = true;
           return;
         }
-        opBuilder.create<linalg::YieldOp>(loc, opResult);
+        linalg::YieldOp::create(opBuilder, loc, opResult);
       });
   if (encounteredError)
     return rewriter.notifyMatchFailure(
@@ -1078,42 +1084,42 @@ static Value createLinalgBodyCalculationForReduceOp(Operation *op,
                                                     PatternRewriter &rewriter) {
   Location loc = op->getLoc();
   if (isa<tosa::ReduceSumOp>(op) && isa<FloatType>(elementTy)) {
-    return rewriter.create<arith::AddFOp>(loc, args);
+    return arith::AddFOp::create(rewriter, loc, args);
   }
 
   if (isa<tosa::ReduceSumOp>(op) && isa<IntegerType>(elementTy)) {
-    return rewriter.create<arith::AddIOp>(loc, args);
+    return arith::AddIOp::create(rewriter, loc, args);
   }
 
   if (isa<tosa::ReduceProductOp>(op) && isa<FloatType>(elementTy)) {
-    return rewriter.create<arith::MulFOp>(loc, args);
+    return arith::MulFOp::create(rewriter, loc, args);
   }
 
   if (isa<tosa::ReduceProductOp>(op) && isa<IntegerType>(elementTy)) {
-    return rewriter.create<arith::MulIOp>(loc, args);
+    return arith::MulIOp::create(rewriter, loc, args);
   }
 
   if (isa<tosa::ReduceMinOp>(op) && isa<FloatType>(elementTy)) {
-    return rewriter.create<arith::MinimumFOp>(loc, args[0], args[1]);
+    return arith::MinimumFOp::create(rewriter, loc, args[0], args[1]);
   }
 
   if (isa<tosa::ReduceMinOp>(op) && isa<IntegerType>(elementTy)) {
-    return rewriter.create<arith::MinSIOp>(loc, args[0], args[1]);
+    return arith::MinSIOp::create(rewriter, loc, args[0], args[1]);
   }
 
   if (isa<tosa::ReduceMaxOp>(op) && isa<FloatType>(elementTy)) {
-    return rewriter.create<arith::MaximumFOp>(loc, args[0], args[1]);
+    return arith::MaximumFOp::create(rewriter, loc, args[0], args[1]);
   }
 
   if (isa<tosa::ReduceMaxOp>(op) && isa<IntegerType>(elementTy)) {
-    return rewriter.create<arith::MaxSIOp>(loc, args[0], args[1]);
+    return arith::MaxSIOp::create(rewriter, loc, args[0], args[1]);
   }
 
   if (isa<tosa::ReduceAllOp>(op) && elementTy.isInteger(1))
-    return rewriter.create<arith::AndIOp>(loc, args);
+    return arith::AndIOp::create(rewriter, loc, args);
 
   if (isa<tosa::ReduceAnyOp>(op) && elementTy.isInteger(1))
-    return rewriter.create<arith::OrIOp>(loc, args);
+    return arith::OrIOp::create(rewriter, loc, args);
 
   return {};
 }
@@ -1139,7 +1145,7 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis,
     if (axis != i) {
       reduceShape.push_back(inputTy.getDimSize(i));
       if (inputTy.isDynamicDim(i))
-        dynDims.push_back(rewriter.create<tensor::DimOp>(loc, input, i));
+        dynDims.push_back(tensor::DimOp::create(rewriter, loc, input, i));
     }
   }
 
@@ -1147,22 +1153,20 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis,
   inputs.push_back(input);
 
   // First fill the output buffer with the init value.
-  auto emptyTensor =
-      rewriter
-          .create<tensor::EmptyOp>(loc, reduceShape, resultTy.getElementType(),
-                                   dynDims)
-          .getResult();
+  auto emptyTensor = tensor::EmptyOp::create(rewriter, loc, reduceShape,
+                                             resultTy.getElementType(), dynDims)
+                         .getResult();
 
   auto fillValueAttr = createInitialValueForReduceOp(op, elementTy, rewriter);
   if (!fillValueAttr)
     return rewriter.notifyMatchFailure(
         op, "No initial value found for reduction operation");
 
-  auto fillValue = rewriter.create<arith::ConstantOp>(loc, fillValueAttr);
-  auto filledTensor = rewriter
-                          .create<linalg::FillOp>(loc, ValueRange{fillValue},
-                                                  ValueRange{emptyTensor})
-                          .result();
+  auto fillValue = arith::ConstantOp::create(rewriter, loc, fillValueAttr);
+  auto filledTensor =
+      linalg::FillOp::create(rewriter, loc, ValueRange{fillValue},
+                             ValueRange{emptyTensor})
+          .result();
   outputs.push_back(filledTensor);
 
   bool isNanIgnoreMode = false;
@@ -1176,16 +1180,14 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis,
       // Additionally we have to keep track of whether we've seen any non-NaN
       // values and then do a final select based on this predicate.
       auto trueAttr = rewriter.getBoolAttr(true);
-      auto trueValue = rewriter.create<arith::ConstantOp>(loc, trueAttr);
+      auto trueValue = arith::ConstantOp::create(rewriter, loc, trueAttr);
       auto emptyBoolTensor =
-          rewriter
-              .create<tensor::EmptyOp>(loc, reduceShape, trueValue.getType(),
-                                       dynDims)
+          tensor::EmptyOp::create(rewriter, loc, reduceShape,
+                                  trueValue.getType(), dynDims)
               .getResult();
       auto allResultsNaNTensor =
-          rewriter
-              .create<linalg::FillOp>(loc, ValueRange{trueValue},
-                                      ValueRange{emptyBoolTensor})
+          linalg::FillOp::create(rewriter, loc, ValueRange{trueValue},
+                                 ValueRange{emptyBoolTensor})
               .result();
       // Note that because the linalg::ReduceOp has two variadic arguments
       // (inputs and outputs) and it has the SameVariadicOperandSize trait we
@@ -1202,8 +1204,8 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis,
   }
 
   bool didEncounterError = false;
-  linalg::LinalgOp linalgOp = rewriter.create<linalg::ReduceOp>(
-      loc, inputs, outputs, axis,
+  linalg::LinalgOp linalgOp = linalg::ReduceOp::create(
+      rewriter, loc, inputs, outputs, axis,
       [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) {
         std::array<Value, 2> binaryArgs{
             blockArgs[0], isNanIgnoreMode ? blockArgs[2] : blockArgs[1]};
@@ -1219,21 +1221,22 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis,
           auto oldAllResultsNanFlagValue = blockArgs[3];
 
           // Unordered comparison of NaN against itself will always return true.
-          Value isNaN = nestedBuilder.create<arith::CmpFOp>(
-              op->getLoc(), arith::CmpFPredicate::UNO, inputValue, inputValue);
+          Value isNaN = arith::CmpFOp::create(nestedBuilder, op->getLoc(),
+                                              arith::CmpFPredicate::UNO,
+                                              inputValue, inputValue);
           // If we've encountered a NaN, take the non-NaN value.
-          auto selectOp = nestedBuilder.create<arith::SelectOp>(
-              op->getLoc(), isNaN, initialValue, result);
+          auto selectOp = arith::SelectOp::create(nestedBuilder, op->getLoc(),
+                                                  isNaN, initialValue, result);
           // Update the flag which keeps track of whether we have seen a non-NaN
           // value.
-          auto newAllResultsNanFlagValue = nestedBuilder.create<arith::AndIOp>(
-              op->getLoc(), oldAllResultsNanFlagValue, isNaN);
+          auto newAllResultsNanFlagValue = arith::AndIOp::create(
+              nestedBuilder, op->getLoc(), oldAllResultsNanFlagValue, isNaN);
           resultsToYield.push_back(selectOp);
           resultsToYield.push_back(newAllResultsNanFlagValue);
         } else {
           resultsToYield.push_back(result);
         }
-        nestedBuilder.create<linalg::YieldOp>(loc, resultsToYield);
+        linalg::YieldOp::create(nestedBuilder, loc, resultsToYield);
       });
 
   if (!didEncounterError)
@@ -1250,24 +1253,21 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis,
     auto nanValueAttr = rewriter.getFloatAttr(
         elementTy,
         APFloat::getNaN(cast<FloatType>(elementTy).getFloatSemantics(), false));
-    auto nanValue = rewriter.create<arith::ConstantOp>(loc, nanValueAttr);
+    auto nanValue = arith::ConstantOp::create(rewriter, loc, nanValueAttr);
     auto emptyNanTensor =
-        rewriter
-            .create<tensor::EmptyOp>(loc, reduceShape,
-                                     resultTy.getElementType(), dynDims)
+        tensor::EmptyOp::create(rewriter, loc, reduceShape,
+                                resultTy.getElementType(), dynDims)
             .getResult();
     auto nanFilledTensor =
-        rewriter
-            .create<linalg::FillOp>(loc, ValueRange{nanValue},
-                                    ValueRange{emptyNanTensor})
+        linalg::FillOp::create(rewriter, loc, ValueRange{nanValue},
+                               ValueRange{emptyNanTensor})
             .result();
 
     // Create an empty tensor, non need to fill this since it will be
     // overwritten by the select.
     auto finalEmptyTensor =
-        rewriter
-            .create<tensor::EmptyOp>(loc, reduceShape,
-                                     resultTy.getElementType(), dynDims)
+        tensor::EmptyOp::create(rewriter, loc, reduceShape,
+                                resultTy.getElementType(), dynDims)
             .getResult();
 
     // Do a selection between the tensors akin to:
@@ -1278,7 +1278,7 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis,
     ins.push_back(linalgOp->getResult(0));
     outs.push_back(finalEmptyTensor);
     auto linalgSelect =
-        rewriter.create<linalg::SelectOp>(op->getLoc(), ins, outs);
+        linalg::SelectOp::create(rewriter, op->getLoc(), ins, outs);
     linalgOp = linalgSelect;
   }
 
@@ -1350,7 +1350,7 @@ public:
     SmallVector<Value> dynDims;
     for (int i = 0; i < outputTy.getRank(); i++) {
       if (outputTy.isDynamicDim(i)) {
-        dynDims.push_back(rewriter.create<tensor::DimOp>(loc, input, i));
+        dynDims.push_back(tensor::DimOp::create(rewriter, loc, input, i));
       }
     }
 
@@ -1401,16 +1401,17 @@ public:
     Value multiplierConstant;
     int64_t multiplierArg = 0;
     if (multiplierValues.size() == 1) {
-      multiplierConstant = rewriter.create<arith::ConstantOp>(
-          loc, rewriter.getI32IntegerAttr(multiplierValues.front()));
+      multiplierConstant = arith::ConstantOp::create(
+          rewriter, loc, rewriter.getI32IntegerAttr(multiplierValues.front()));
     } else {
       SmallVector<AffineExpr, 2> multiplierExprs{
           rewriter.getAffineDimExpr(rank - 1)};
       auto multiplierType =
           RankedTensorType::get({static_cast<int64_t>(multiplierValues.size())},
                                 rewriter.getI32Type());
-      genericInputs.push_back(rewriter.create<arith::ConstantOp>(
-          loc, DenseIntElementsAttr::get(multiplierType, multiplierValues)));
+      genericInputs.push_back(arith::ConstantOp::create(
+          rewriter, loc,
+          DenseIntElementsAttr::get(multiplierType, multiplierValues)));
 
       indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
                                             /*symbolCount=*/0, multiplierExprs,
@@ -1424,16 +1425,16 @@ public:
     Value shiftConstant;
     int64_t shiftArg = 0;
     if (shiftValues.size() == 1) {
-      shiftConstant = rewriter.create<arith::ConstantOp>(
-          loc, rewriter.getI8IntegerAttr(shiftValues.front()));
+      shiftConstant = arith::ConstantOp::create(
+          rewriter, loc, rewriter.getI8IntegerAttr(shiftValues.front()));
     } else {
       SmallVector<AffineExpr, 2> shiftExprs = {
           rewriter.getAffineDimExpr(rank - 1)};
       auto shiftType =
           RankedTensorType::get({static_cast<int64_t>(shiftValues.size())},
                                 rewriter.getIntegerType(8));
-      genericInputs.push_back(rewriter.create<arith::ConstantOp>(
-          loc, DenseIntElementsAttr::get(shiftType, shiftValues)));
+      genericInputs.push_back(arith::ConstantOp::create(
+          rewriter, loc, DenseIntElementsAttr::get(shiftType, shiftValues)));
       indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
                                             /*symbolCount=*/0, shiftExprs,
                                             rewriter.getContext()));
@@ -1444,13 +1445,13 @@ public:
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank));
 
     // Construct the indexing maps needed for linalg.generic ops.
-    Value emptyTensor = rewriter.create<tensor::EmptyOp>(
-        loc, outputTy.getShape(), outputTy.getElementType(),
+    Value emptyTensor = tensor::EmptyOp::create(
+        rewriter, loc, outputTy.getShape(), outputTy.getElementType(),
         ArrayRef<Value>({dynDims}));
 
-    auto linalgOp = rewriter.create<linalg::GenericOp>(
-        loc, outputTy, genericInputs, ValueRange{emptyTensor}, indexingMaps,
-        getNParallelLoopsAttrs(rank),
+    auto linalgOp = linalg::GenericOp::create(
+        rewriter, loc, outputTy, genericInputs, ValueRange{emptyTensor},
+        indexingMaps, getNParallelLoopsAttrs(rank),
         [&](OpBuilder &nestedBuilder, Location nestedLoc,
             ValueRange blockArgs) {
           Value value = blockArgs[0];
@@ -1466,9 +1467,10 @@ public:
           const int32_t inBitwidth = valueTy.getIntOrFloatBitWidth();
           // Extend zeropoint for sub-32bits widths.
           const int32_t inAttrBitwidth = inBitwidth > 32 ? inBitwidth : 32;
-          auto inputZp = nestedBuilder.create<arith::ConstantOp>(
-              loc, IntegerAttr::get(rewriter.getIntegerType(inAttrBitwidth),
-                                    *maybeIZp));
+          auto inputZp = arith::ConstantOp::create(
+              nestedBuilder, loc,
+              IntegerAttr::get(rewriter.getIntegerType(inAttrBitwidth),
+                               *maybeIZp));
 
           FailureOr<int64_t> maybeOZp = op.getOutputZeroPoint();
           if (failed(maybeOZp)) {
@@ -1482,43 +1484,43 @@ public:
           unsigned outBitWidth = outIntType.getWidth();
           const int32_t outAttrBitwidth = 32;
           assert(outBitWidth <= 32 && "Unexpected output zeropoint bitwidth");
-          auto outputZp = nestedBuilder.create<arith::ConstantOp>(
-              loc, IntegerAttr::get(rewriter.getIntegerType(outAttrBitwidth),
-                                    *maybeOZp));
+          auto outputZp = arith::ConstantOp::create(
+              nestedBuilder, loc,
+              IntegerAttr::get(rewriter.getIntegerType(outAttrBitwidth),
+                               *maybeOZp));
 
           Value multiplier = multiplierConstant ? multiplierConstant
                                                 : blockArgs[multiplierArg];
           Value shift = shiftConstant ? shiftConstant : blockArgs[shiftArg];
 
           if (valueTy.isUnsignedInteger()) {
-            value = nestedBuilder
-                        .create<UnrealizedConversionCastOp>(
-                            nestedLoc,
-                            nestedBuilder.getIntegerType(
-                                valueTy.getIntOrFloatBitWidth()),
-                            value)
+            value = UnrealizedConversionCastOp::create(
+                        nestedBuilder, nestedLoc,
+                        nestedBuilder.getIntegerType(
+                            valueTy.getIntOrFloatBitWidth()),
+                        value)
                         .getResult(0);
           }
           if (valueTy.getIntOrFloatBitWidth() < 32) {
             if (op.getInputUnsigned()) {
-              value = nestedBuilder.create<arith::ExtUIOp>(
-                  nestedLoc, nestedBuilder.getI32Type(), value);
+              value = arith::ExtUIOp::create(nestedBuilder, nestedLoc,
+                                             nestedBuilder.getI32Type(), value);
             } else {
-              value = nestedBuilder.create<arith::ExtSIOp>(
-                  nestedLoc, nestedBuilder.getI32Type(), value);
+              value = arith::ExtSIOp::create(nestedBuilder, nestedLoc,
+                                             nestedBuilder.getI32Type(), value);
             }
           }
 
           value =
-              nestedBuilder.create<arith::SubIOp>(nestedLoc, value, inputZp);
+              arith::SubIOp::create(nestedBuilder, nestedLoc, value, inputZp);
 
-          value = nestedBuilder.create<tosa::ApplyScaleOp>(
-              loc, nestedBuilder.getI32Type(), value, multiplier, shift,
-              roundingMode);
+          value = tosa::ApplyScaleOp::create(nestedBuilder, loc,
+                                             nestedBuilder.getI32Type(), value,
+                                             multiplier, shift, roundingMode);
 
           // Move to the new zero-point.
           value =
-              nestedBuilder.create<arith::AddIOp>(nestedLoc, value, outputZp);
+              arith::AddIOp::create(nestedBuilder, nestedLoc, value, outputZp);
 
           // Saturate to the output size.
           int32_t intMin = APInt::getSignedMinValue(outBitWidth).getSExtValue();
@@ -1530,27 +1532,26 @@ public:
             intMax = APInt::getMaxValue(outBitWidth).getZExtValue();
           }
 
-          auto intMinVal = nestedBuilder.create<arith::ConstantOp>(
-              loc, nestedBuilder.getI32IntegerAttr(intMin));
-          auto intMaxVal = nestedBuilder.create<arith::ConstantOp>(
-              loc, nestedBuilder.getI32IntegerAttr(intMax));
+          auto intMinVal = arith::ConstantOp::create(
+              nestedBuilder, loc, nestedBuilder.getI32IntegerAttr(intMin));
+          auto intMaxVal = arith::ConstantOp::create(
+              nestedBuilder, loc, nestedBuilder.getI32IntegerAttr(intMax));
 
           value = clampIntHelper(nestedLoc, value, intMinVal, intMaxVal,
                                  nestedBuilder, /*isUnsigned=*/false);
 
           if (outIntType.getWidth() < 32) {
-            value = nestedBuilder.create<arith::TruncIOp>(
-                nestedLoc, rewriter.getIntegerType(outIntType.getWidth()),
-                value);
+            value = arith::TruncIOp::create(
+                nestedBuilder, nestedLoc,
+                rewriter.getIntegerType(outIntType.getWidth()), value);
           }
 
           if (outIntType.isUnsignedInteger()) {
-            value = nestedBuilder
-                        .create<UnrealizedConversionCastOp>(nestedLoc,
-                                                            outIntType, value)
+            value = UnrealizedConversionCastOp::create(nestedBuilder, nestedLoc,
+                                                       outIntType, value)
                         .getResult(0);
           }
-          nestedBuilder.create<linalg::YieldOp>(loc, value);
+          linalg::YieldOp::create(nestedBuilder, loc, value);
         });
 
     rewriter.replaceOp(op, linalgOp->getResults());
@@ -1608,48 +1609,49 @@ public:
     auto collapseTy =
         RankedTensorType::get({inputTy.getDimSize(0), inputTy.getDimSize(3)},
                               inputTy.getElementType());
-    Value collapse = builder.create<tensor::CollapseShapeOp>(collapseTy, input,
-                                                             reassociationMap);
+    Value collapse = tensor::CollapseShapeOp::create(builder, collapseTy, input,
+                                                     reassociationMap);
 
     // Get any dynamic shapes that appear in the input format.
     llvm::SmallVector<Value> outputDynSize;
     if (inputTy.isDynamicDim(0))
-      outputDynSize.push_back(builder.create<tensor::DimOp>(input, 0));
+      outputDynSize.push_back(tensor::DimOp::create(builder, input, 0));
     if (inputTy.isDynamicDim(3))
-      outputDynSize.push_back(builder.create<tensor::DimOp>(input, 3));
+      outputDynSize.push_back(tensor::DimOp::create(builder, input, 3));
 
     // Generate the elementwise operation for casting scaling the input value.
     auto genericTy = collapseTy.clone(resultTy.getElementType());
-    Value empty = builder.create<tensor::EmptyOp>(
-        genericTy.getShape(), resultTy.getElementType(), outputDynSize);
+    Value empty =
+        tensor::EmptyOp::create(builder, genericTy.getShape(),
+                                resultTy.getElementType(), outputDynSize);
     auto genericMap = rewriter.getMultiDimIdentityMap(genericTy.getRank());
     SmallVector<utils::IteratorType> iterators(genericTy.getRank(),
                                                utils::IteratorType::parallel);
 
-    auto generic = builder.create<linalg::GenericOp>(
-        genericTy, ValueRange{collapse}, ValueRange{empty},
+    auto generic = linalg::GenericOp::create(
+        builder, genericTy, ValueRange{collapse}, ValueRange{empty},
         ArrayRef<AffineMap>{genericMap, genericMap}, iterators,
         [=](OpBuilder &b, Location loc, ValueRange args) {
           Value value = args[0];
           // This is the quantized case.
           if (inputTy.getElementType() != resultTy.getElementType()) {
-            value =
-                b.create<arith::ExtSIOp>(loc, resultTy.getElementType(), value);
+            value = arith::ExtSIOp::create(b, loc, resultTy.getElementType(),
+                                           value);
 
             if (isBilinear && scale[0] != 0) {
-              Value scaleY = b.create<arith::ConstantOp>(
-                  loc, b.getI32IntegerAttr(scale[0]));
-              value = b.create<arith::MulIOp>(loc, value, scaleY);
+              Value scaleY = arith::ConstantOp::create(
+                  b, loc, b.getI32IntegerAttr(scale[0]));
+              value = arith::MulIOp::create(b, loc, value, scaleY);
             }
 
             if (isBilinear && scale[2] != 0) {
-              Value scaleX = b.create<arith::ConstantOp>(
-                  loc, b.getI32IntegerAttr(scale[2]));
-              value = b.create<arith::MulIOp>(loc, value, scaleX);
+              Value scaleX = arith::ConstantOp::create(
+                  b, loc, b.getI32IntegerAttr(scale[2]));
+              value = arith::MulIOp::create(b, loc, value, scaleX);
             }
           }
 
-          b.create<linalg::YieldOp>(loc, value);
+          linalg::YieldOp::create(b, loc, value);
         });
 
     rewriter.replaceOpWithNewOp<tensor::ExpandShapeOp>(
@@ -1697,9 +1699,9 @@ public:
     resizeShape.push_back(channels);
 
     auto resizeTy = resultTy.clone(resizeShape);
-    auto resize = builder.create<tosa::ResizeOp>(resizeTy, input, op.getScale(),
-                                                 op.getOffset(), op.getBorder(),
-                                                 op.getMode());
+    auto resize =
+        tosa::ResizeOp::create(builder, resizeTy, input, op.getScale(),
+                               op.getOffset(), op.getBorder(), op.getMode());
 
     // Collapse an unit result dims.
     SmallVector<ReassociationExprs, 4> reassociationMap(2);
@@ -1720,20 +1722,20 @@ public:
     collapseShape.push_back(channels);
 
     auto collapseTy = resultTy.clone(collapseShape);
-    Value collapse = builder.create<tensor::CollapseShapeOp>(collapseTy, resize,
-                                                             reassociationMap);
+    Value collapse = tensor::CollapseShapeOp::create(builder, collapseTy,
+                                                     resize, reassociationMap);
 
     // Broadcast the collapsed shape to the output result.
     llvm::SmallVector<Value> outputDynSize;
     if (inputTy.isDynamicDim(0))
-      outputDynSize.push_back(builder.create<tensor::DimOp>(input, 0));
+      outputDynSize.push_back(tensor::DimOp::create(builder, input, 0));
     if (inputTy.isDynamicDim(3))
-      outputDynSize.push_back(builder.create<tensor::DimOp>(input, 3));
+      outputDynSize.push_back(tensor::DimOp::create(builder, input, 3));
 
     SmallVector<utils::IteratorType> iterators(resultTy.getRank(),
                                                utils::IteratorType::parallel);
-    Value empty = builder.create<tensor::EmptyOp>(
-        resultTy.getShape(), resultTy.getElementType(), outputDynSize);
+    Value empty = tensor::EmptyOp::create(
+        builder, resultTy.getShape(), resultTy.getElementType(), outputDynSize);
 
     SmallVector<AffineExpr, 4> inputExprs{rewriter.getAffineDimExpr(0)};
     if (inputH != 1)
@@ -1751,7 +1753,7 @@ public:
         ArrayRef<AffineMap>{inputMap, outputMap}, iterators,
         [=](OpBuilder &b, Location loc, ValueRange args) {
           Value value = args[0];
-          b.create<linalg::YieldOp>(loc, value);
+          linalg::YieldOp::create(b, loc, value);
         });
 
     return success();
@@ -1789,10 +1791,10 @@ public:
 
     SmallVector<AffineMap, 2> affineMaps = {
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
-    auto emptyTensor = b.create<tensor::EmptyOp>(resultTy.getShape(), resultETy,
-                                                 *dynamicDimsOr);
-    auto genericOp = b.create<linalg::GenericOp>(
-        resultTy, ValueRange({}), ValueRange{emptyTensor}, affineMaps,
+    auto emptyTensor = tensor::EmptyOp::create(b, resultTy.getShape(),
+                                               resultETy, *dynamicDimsOr);
+    auto genericOp = linalg::GenericOp::create(
+        b, resultTy, ValueRange({}), ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(resultTy.getRank()));
     Value resize = genericOp.getResult(0);
 
@@ -1800,19 +1802,21 @@ public:
       OpBuilder::InsertionGuard regionGuard(b);
       b.createBlock(&genericOp.getRegion(), genericOp.getRegion().end(),
                     TypeRange({resultETy}), loc);
-      Value batch = b.create<linalg::IndexOp>(0);
-      Value y = b.create<linalg::IndexOp>(1);
-      Value x = b.create<linalg::IndexOp>(2);
-      Value channel = b.create<linalg::IndexOp>(3);
+      Value batch = linalg::IndexOp::create(b, 0);
+      Value y = linalg::IndexOp::create(b, 1);
+      Value x = linalg::IndexOp::create(b, 2);
+      Value channel = linalg::IndexOp::create(b, 3);
 
       Value zeroI32 =
-          b.create<arith::ConstantOp>(b.getZeroAttr(b.getI32Type()));
-      Value zeroFp = b.create<arith::ConstantOp>(b.getZeroAttr(floatTy));
-      Value hMax = b.create<arith::ConstantOp>(b.getI32IntegerAttr(imageH - 1));
-      Value wMax = b.create<arith::ConstantOp>(b.getI32IntegerAttr(imageW - 1));
+          arith::ConstantOp::create(b, b.getZeroAttr(b.getI32Type()));
+      Value zeroFp = arith::ConstantOp::create(b, b.getZeroAttr(floatTy));
+      Value hMax =
+          arith::ConstantOp::create(b, b.getI32IntegerAttr(imageH - 1));
+      Value wMax =
+          arith::ConstantOp::create(b, b.getI32IntegerAttr(imageW - 1));
 
-      Value inY = b.create<arith::IndexCastOp>(b.getI32Type(), y);
-      Value inX = b.create<arith::IndexCastOp>(b.getI32Type(), x);
+      Value inY = arith::IndexCastOp::create(b, b.getI32Type(), y);
+      Value inX = arith::IndexCastOp::create(b, b.getI32Type(), x);
 
       SmallVector<int64_t> scale, offset, border;
       if (!tosa::getConstShapeValues(op.getScale().getDefiningOp(), scale) ||
@@ -1824,16 +1828,16 @@ public:
       }
 
       Value yScaleN, yScaleD, xScaleN, xScaleD;
-      yScaleN = b.create<arith::ConstantOp>(b.getI32IntegerAttr(scale[0]));
-      yScaleD = b.create<arith::ConstantOp>(b.getI32IntegerAttr(scale[1]));
-      xScaleN = b.create<arith::ConstantOp>(b.getI32IntegerAttr(scale[2]));
-      xScaleD = b.create<arith::ConstantOp>(b.getI32IntegerAttr(scale[3]));
+      yScaleN = arith::ConstantOp::create(b, b.getI32IntegerAttr(scale[0]));
+      yScaleD = arith::ConstantOp::create(b, b.getI32IntegerAttr(scale[1]));
+      xScaleN = arith::ConstantOp::create(b, b.getI32IntegerAttr(scale[2]));
+      xScaleD = arith::ConstantOp::create(b, b.getI32IntegerAttr(scale[3]));
 
       Value yOffset, xOffset, yBorder, xBorder;
-      yOffset = b.create<arith::ConstantOp>(b.getI32IntegerAttr(offset[0]));
-      xOffset = b.create<arith::ConstantOp>(b.getI32IntegerAttr(offset[1]));
-      yBorder = b.create<arith::ConstantOp>(b.getI32IntegerAttr(border[0]));
-      xBorder = b.create<arith::ConstantOp>(b.getI32IntegerAttr(border[1]));
+      yOffset = arith::ConstantOp::create(b, b.getI32IntegerAttr(offset[0]));
+      xOffset = arith::ConstantOp::create(b, b.getI32IntegerAttr(offset[1]));
+      yBorder = arith::ConstantOp::create(b, b.getI32IntegerAttr(border[0]));
+      xBorder = arith::ConstantOp::create(b, b.getI32IntegerAttr(border[1]));
 
       // Compute the ix and dx values for both the X and Y dimensions.
       auto getIndexAndDeltaFp = [&](Value &index, Value &delta, Value in,
@@ -1846,16 +1850,16 @@ public:
         }
         // x = x * scale_d + offset;
         // ix = floor(x / scale_n)
-        Value val = b.create<arith::MulIOp>(in, scaleD);
-        val = b.create<arith::AddIOp>(val, offset);
-        index = b.create<arith::FloorDivSIOp>(val, scaleN);
+        Value val = arith::MulIOp::create(b, in, scaleD);
+        val = arith::AddIOp::create(b, val, offset);
+        index = arith::FloorDivSIOp::create(b, val, scaleN);
 
         // rx = x % scale_n
         // dx = rx / scale_n
-        Value r = b.create<arith::RemSIOp>(val, scaleN);
-        Value rFp = b.create<arith::SIToFPOp>(floatTy, r);
-        Value scaleNfp = b.create<arith::UIToFPOp>(floatTy, scaleN);
-        delta = b.create<arith::DivFOp>(rFp, scaleNfp);
+        Value r = arith::RemSIOp::create(b, val, scaleN);
+        Value rFp = arith::SIToFPOp::create(b, floatTy, r);
+        Value scaleNfp = arith::UIToFPOp::create(b, floatTy, scaleN);
+        delta = arith::DivFOp::create(b, rFp, scaleNfp);
       };
 
       // Compute the ix and dx values for the X and Y dimensions - int case.
@@ -1870,11 +1874,11 @@ public:
         // x = x * scale_d + offset;
         // ix = floor(x / scale_n)
         //  dx = x - ix * scale_n;
-        Value val = b.create<arith::MulIOp>(in, scaleD);
-        val = b.create<arith::AddIOp>(val, offset);
-        index = b.create<arith::DivSIOp>(val, scaleN);
-        delta = b.create<arith::MulIOp>(index, scaleN);
-        delta = b.create<arith::SubIOp>(val, delta);
+        Value val = arith::MulIOp::create(b, in, scaleD);
+        val = arith::AddIOp::create(b, val, offset);
+        index = arith::DivSIOp::create(b, val, scaleN);
+        delta = arith::MulIOp::create(b, index, scaleN);
+        delta = arith::SubIOp::create(b, val, delta);
       };
 
       Value ix, iy, dx, dy;
@@ -1887,54 +1891,55 @@ public:
       }
 
       if (op.getMode() == "NEAREST_NEIGHBOR") {
-        auto one = b.create<arith::ConstantOp>(b.getI32IntegerAttr(1));
+        auto one = arith::ConstantOp::create(b, b.getI32IntegerAttr(1));
 
         auto getNearestIndexAndClamp = [&](Value val, Value dval, Value scale,
                                            Value max, int size,
                                            ImplicitLocOpBuilder &b) -> Value {
           if (size == 1) {
-            return b.create<arith::ConstantIndexOp>(0);
+            return arith::ConstantIndexOp::create(b, 0);
           }
 
           Value pred;
           if (floatingPointMode) {
-            auto h = b.create<arith::ConstantOp>(b.getFloatAttr(floatTy, 0.5f));
-            pred = b.create<arith::CmpFOp>(arith::CmpFPredicate::OGE, dval, h);
+            auto h =
+                arith::ConstantOp::create(b, b.getFloatAttr(floatTy, 0.5f));
+            pred = arith::CmpFOp::create(b, arith::CmpFPredicate::OGE, dval, h);
           } else {
-            Value dvalDouble = b.create<arith::ShLIOp>(dval, one);
-            pred = b.create<arith::CmpIOp>(arith::CmpIPredicate::sge,
-                                           dvalDouble, scale);
+            Value dvalDouble = arith::ShLIOp::create(b, dval, one);
+            pred = arith::CmpIOp::create(b, arith::CmpIPredicate::sge,
+                                         dvalDouble, scale);
           }
 
-          auto offset = b.create<arith::SelectOp>(pred, one, zeroI32);
-          val = b.create<arith::AddIOp>(val, offset);
+          auto offset = arith::SelectOp::create(b, pred, one, zeroI32);
+          val = arith::AddIOp::create(b, val, offset);
           val = clampIntHelper(loc, val, zeroI32, max, b, /*isUnsigned=*/false);
-          return b.create<arith::IndexCastOp>(b.getIndexType(), val);
+          return arith::IndexCastOp::create(b, b.getIndexType(), val);
         };
 
         iy = getNearestIndexAndClamp(iy, dy, yScaleN, hMax, imageH, b);
         ix = getNearestIndexAndClamp(ix, dx, xScaleN, wMax, imageW, b);
 
-        Value result = b.create<tensor::ExtractOp>(
-            input, ValueRange{batch, iy, ix, channel});
+        Value result = tensor::ExtractOp::create(
+            b, input, ValueRange{batch, iy, ix, channel});
 
-        b.create<linalg::YieldOp>(result);
+        linalg::YieldOp::create(b, result);
       } else {
         // The mode here must be BILINEAR.
         assert(op.getMode() == "BILINEAR");
 
-        auto oneVal = b.create<arith::ConstantOp>(b.getI32IntegerAttr(1));
+        auto oneVal = arith::ConstantOp::create(b, b.getI32IntegerAttr(1));
 
         auto getClampedIdxs = [&](Value &val0, Value &val1, int size, Value in,
                                   Value max, ImplicitLocOpBuilder &b) {
           val0 = in;
-          val1 = b.create<arith::AddIOp>(val0, oneVal);
+          val1 = arith::AddIOp::create(b, val0, oneVal);
           val0 =
               clampIntHelper(loc, val0, zeroI32, max, b, /*isUnsigned=*/false);
           val1 =
               clampIntHelper(loc, val1, zeroI32, max, b, /*isUnsigned=*/false);
-          val0 = b.create<arith::IndexCastOp>(b.getIndexType(), val0);
-          val1 = b.create<arith::IndexCastOp>(b.getIndexType(), val1);
+          val0 = arith::IndexCastOp::create(b, b.getIndexType(), val0);
+          val1 = arith::IndexCastOp::create(b, b.getIndexType(), val1);
         };
 
         // Linalg equivalent to the section below:
@@ -1946,27 +1951,27 @@ public:
         getClampedIdxs(y0, y1, imageH, iy, hMax, b);
         getClampedIdxs(x0, x1, imageW, ix, wMax, b);
 
-        Value y0x0 = b.create<tensor::ExtractOp>(
-            input, ValueRange{batch, y0, x0, channel});
-        Value y0x1 = b.create<tensor::ExtractOp>(
-            input, ValueRange{batch, y0, x1, channel});
-        Value y1x0 = b.create<tensor::ExtractOp>(
-            input, ValueRange{batch, y1, x0, channel});
-        Value y1x1 = b.create<tensor::ExtractOp>(
-            input, ValueRange{batch, y1, x1, channel});
+        Value y0x0 = tensor::ExtractOp::create(
+            b, input, ValueRange{batch, y0, x0, channel});
+        Value y0x1 = tensor::ExtractOp::create(
+            b, input, ValueRange{batch, y0, x1, channel});
+        Value y1x0 = tensor::ExtractOp::create(
+            b, input, ValueRange{batch, y1, x0, channel});
+        Value y1x1 = tensor::ExtractOp::create(
+            b, input, ValueRange{batch, y1, x1, channel});
 
         if (floatingPointMode) {
           auto oneVal =
-              b.create<arith::ConstantOp>(b.getFloatAttr(floatTy, 1.0f));
+              arith::ConstantOp::create(b, b.getFloatAttr(floatTy, 1.0f));
           auto interpolate = [&](Value val0, Value val1, Value delta,
                                  int inputSize,
                                  ImplicitLocOpBuilder &b) -> Value {
             if (inputSize == 1)
               return val0;
-            Value oneMinusDelta = b.create<arith::SubFOp>(oneVal, delta);
-            Value mul0 = b.create<arith::MulFOp>(val0, oneMinusDelta);
-            Value mul1 = b.create<arith::MulFOp>(val1, delta);
-            return b.create<arith::AddFOp>(mul0, mul1);
+            Value oneMinusDelta = arith::SubFOp::create(b, oneVal, delta);
+            Value mul0 = arith::MulFOp::create(b, val0, oneMinusDelta);
+            Value mul1 = arith::MulFOp::create(b, val1, delta);
+            return arith::AddFOp::create(b, mul0, mul1);
           };
 
           // Linalg equivalent to the section below:
@@ -1982,18 +1987,18 @@ public:
           // Linalg equivalent to the section below:
           //   result = topAcc * (unit_y - dy) + bottomAcc * dy
           Value result = interpolate(topAcc, bottomAcc, dy, imageH, b);
-          b.create<linalg::YieldOp>(result);
+          linalg::YieldOp::create(b, result);
         } else {
           // Perform in quantized space.
-          y0x0 = b.create<arith::ExtSIOp>(resultETy, y0x0);
-          y0x1 = b.create<arith::ExtSIOp>(resultETy, y0x1);
-          y1x0 = b.create<arith::ExtSIOp>(resultETy, y1x0);
-          y1x1 = b.create<arith::ExtSIOp>(resultETy, y1x1);
+          y0x0 = arith::ExtSIOp::create(b, resultETy, y0x0);
+          y0x1 = arith::ExtSIOp::create(b, resultETy, y0x1);
+          y1x0 = arith::ExtSIOp::create(b, resultETy, y1x0);
+          y1x1 = arith::ExtSIOp::create(b, resultETy, y1x1);
 
           const int64_t deltaBitwidth = dx.getType().getIntOrFloatBitWidth();
           if (resultETy.getIntOrFloatBitWidth() > deltaBitwidth) {
-            dx = b.create<arith::ExtSIOp>(resultETy, dx);
-            dy = b.create<arith::ExtSIOp>(resultETy, dy);
+            dx = arith::ExtSIOp::create(b, resultETy, dx);
+            dy = arith::ExtSIOp::create(b, resultETy, dy);
           }
 
           Value yScaleNExt = yScaleN;
@@ -2002,26 +2007,26 @@ public:
           const int64_t scaleBitwidth =
               xScaleN.getType().getIntOrFloatBitWidth();
           if (resultETy.getIntOrFloatBitWidth() > scaleBitwidth) {
-            yScaleNExt = b.create<arith::ExtSIOp>(resultETy, yScaleN);
-            xScaleNExt = b.create<arith::ExtSIOp>(resultETy, xScaleN);
+            yScaleNExt = arith::ExtSIOp::create(b, resultETy, yScaleN);
+            xScaleNExt = arith::ExtSIOp::create(b, resultETy, xScaleN);
           }
 
           auto interpolate = [](Value val0, Value val1, Value weight1,
                                 Value scale, int inputSize,
                                 ImplicitLocOpBuilder &b) -> Value {
             if (inputSize == 1)
-              return b.create<arith::MulIOp>(val0, scale);
-            Value weight0 = b.create<arith::SubIOp>(scale, weight1);
-            Value mul0 = b.create<arith::MulIOp>(val0, weight0);
-            Value mul1 = b.create<arith::MulIOp>(val1, weight1);
-            return b.create<arith::AddIOp>(mul0, mul1);
+              return arith::MulIOp::create(b, val0, scale);
+            Value weight0 = arith::SubIOp::create(b, scale, weight1);
+            Value mul0 = arith::MulIOp::create(b, val0, weight0);
+            Value mul1 = arith::MulIOp::create(b, val1, weight1);
+            return arith::AddIOp::create(b, mul0, mul1);
           };
 
           Value topAcc = interpolate(y0x0, y0x1, dx, xScaleNExt, imageW, b);
           Value bottomAcc = interpolate(y1x0, y1x1, dx, xScaleNExt, imageW, b);
           Value result =
               interpolate(topAcc, bottomAcc, dy, yScaleNExt, imageH, b);
-          b.create<linalg::YieldOp>(result);
+          linalg::YieldOp::create(b, result);
         }
       }
     }
@@ -2072,17 +2077,16 @@ public:
     SmallVector<Value> dynDims;
     for (int i = 0; i < inputTy.getRank(); i++) {
       if (inputTy.isDynamicDim(i)) {
-        dynDims.push_back(rewriter.create<tensor::DimOp>(loc, input, i));
+        dynDims.push_back(tensor::DimOp::create(rewriter, loc, input, i));
       }
     }
 
-    Value axisDimSize = rewriter.create<tensor::DimOp>(loc, input, axis);
+    Value axisDimSize = tensor::DimOp::create(rewriter, loc, input, axis);
 
     // First fill the output buffer with the init value.
-    auto emptyTensor = rewriter
-                           .create<tensor::EmptyOp>(loc, inputTy.getShape(),
-                                                    inputTy.getElementType(),
-                                                    ArrayRef<Value>({dynDims}))
+    auto emptyTensor = tensor::EmptyOp::create(
+                           rewriter, loc, inputTy.getShape(),
+                           inputTy.getElementType(), ArrayRef<Value>({dynDims}))
                            .getResult();
     SmallVector<AffineMap, 2> affineMaps = {
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
@@ -2094,22 +2098,22 @@ public:
           llvm::SmallVector<Value> indices;
           for (unsigned int i = 0; i < inputTy.getRank(); i++) {
             Value index =
-                rewriter.create<linalg::IndexOp>(nestedLoc, i).getResult();
+                linalg::IndexOp::create(rewriter, nestedLoc, i).getResult();
             if (i == axis) {
-              auto one = rewriter.create<arith::ConstantIndexOp>(nestedLoc, 1);
+              auto one = arith::ConstantIndexOp::create(rewriter, nestedLoc, 1);
               auto sizeMinusOne =
-                  rewriter.create<arith::SubIOp>(nestedLoc, axisDimSize, one);
-              index = rewriter.create<arith::SubIOp>(nestedLoc, sizeMinusOne,
-                                                     index);
+                  arith::SubIOp::create(rewriter, nestedLoc, axisDimSize, one);
+              index = arith::SubIOp::create(rewriter, nestedLoc, sizeMinusOne,
+                                            index);
             }
 
             indices.push_back(index);
           }
 
-          auto extract = nestedBuilder.create<tensor::ExtractOp>(
-              nestedLoc, input, indices);
-          nestedBuilder.create<linalg::YieldOp>(op.getLoc(),
-                                                extract.getResult());
+          auto extract = tensor::ExtractOp::create(nestedBuilder, nestedLoc,
+                                                   input, indices);
+          linalg::YieldOp::create(nestedBuilder, op.getLoc(),
+                                  extract.getResult());
         });
     return success();
   }
@@ -2148,12 +2152,12 @@ struct TileConverter : public OpConversionPattern<tosa::TileOp> {
     SmallVector<Value> dynDims;
     for (int i = 0; i < inputTy.getRank(); i++) {
       if (inputTy.isDynamicDim(i) || multiples[i] == -1) {
-        dynDims.push_back(rewriter.create<tensor::DimOp>(loc, input, i));
+        dynDims.push_back(tensor::DimOp::create(rewriter, loc, input, i));
       }
     }
 
-    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
-        op.getLoc(), genericShape, elementTy, dynDims);
+    auto emptyTensor = tensor::EmptyOp::create(
+        rewriter, op.getLoc(), genericShape, elementTy, dynDims);
 
     // We needs to map the input shape to the non-broadcasted dimensions.
     SmallVector<AffineExpr, 4> dimExprs;
@@ -2168,12 +2172,12 @@ struct TileConverter : public OpConversionPattern<tosa::TileOp> {
     SmallVector<AffineMap, 2> affineMaps = {
         readAffineMap, rewriter.getMultiDimIdentityMap(genericShape.size())};
 
-    auto genericOp = rewriter.create<linalg::GenericOp>(
-        loc, RankedTensorType::get(genericShape, elementTy), input,
+    auto genericOp = linalg::GenericOp::create(
+        rewriter, loc, RankedTensorType::get(genericShape, elementTy), input,
         ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(genericShape.size()),
         [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
-          nestedBuilder.create<linalg::YieldOp>(op.getLoc(), *args.begin());
+          linalg::YieldOp::create(nestedBuilder, op.getLoc(), *args.begin());
         });
 
     auto shapeValue = getTosaConstShape(
@@ -2220,28 +2224,27 @@ public:
     SmallVector<Value> dynDims;
     for (int i = 0; i < inputTy.getRank(); i++) {
       if (inputTy.isDynamicDim(i) && i != axis) {
-        dynDims.push_back(rewriter.create<tensor::DimOp>(loc, input, i));
+        dynDims.push_back(tensor::DimOp::create(rewriter, loc, input, i));
       }
     }
 
     // First fill the output buffer for the index.
-    auto emptyTensorIdx = rewriter
-                              .create<tensor::EmptyOp>(loc, resultTy.getShape(),
-                                                       outElementTy, dynDims)
-                              .getResult();
-    auto fillValueIdx = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getIntegerAttr(outElementTy, 0));
+    auto emptyTensorIdx =
+        tensor::EmptyOp::create(rewriter, loc, resultTy.getShape(),
+                                outElementTy, dynDims)
+            .getResult();
+    auto fillValueIdx = arith::ConstantOp::create(
+        rewriter, loc, rewriter.getIntegerAttr(outElementTy, 0));
     auto filledTensorIdx =
-        rewriter
-            .create<linalg::FillOp>(loc, ValueRange{fillValueIdx},
-                                    ValueRange{emptyTensorIdx})
+        linalg::FillOp::create(rewriter, loc, ValueRange{fillValueIdx},
+                               ValueRange{emptyTensorIdx})
             .result();
 
     // Second fill the output buffer for the running max.
-    auto emptyTensorMax = rewriter
-                              .create<tensor::EmptyOp>(loc, resultTy.getShape(),
-                                                       inElementTy, dynDims)
-                              .getResult();
+    auto emptyTensorMax =
+        tensor::EmptyOp::create(rewriter, loc, resultTy.getShape(), inElementTy,
+                                dynDims)
+            .getResult();
     auto fillValueMaxAttr =
         createInitialValueForReduceOp(argmaxOp, inElementTy, rewriter);
 
@@ -2250,11 +2253,10 @@ public:
           argmaxOp, "unsupported tosa.argmax element type");
 
     auto fillValueMax =
-        rewriter.create<arith::ConstantOp>(loc, fillValueMaxAttr);
+        arith::ConstantOp::create(rewriter, loc, fillValueMaxAttr);
     auto filledTensorMax =
-        rewriter
-            .create<linalg::FillOp>(loc, ValueRange{fillValueMax},
-                                    ValueRange{emptyTensorMax})
+        linalg::FillOp::create(rewriter, loc, ValueRange{fillValueMax},
+                               ValueRange{emptyTensorMax})
             .result();
 
     // We need to reduce along the arg-max axis, with parallel operations along
@@ -2274,8 +2276,8 @@ public:
     bool didEncounterError = false;
     auto maps = AffineMap::inferFromExprList({srcExprs, dstExprs, dstExprs},
                                              rewriter.getContext());
-    auto linalgOp = rewriter.create<linalg::GenericOp>(
-        loc, ArrayRef<Type>({resultTy, resultMaxTy}), input,
+    auto linalgOp = linalg::GenericOp::create(
+        rewriter, loc, ArrayRef<Type>({resultTy, resultMaxTy}), input,
         ValueRange({filledTensorIdx, filledTensorMax}), maps, iteratorTypes,
         [&](OpBuilder &nestedBuilder, Location nestedLoc,
             ValueRange blockArgs) {
@@ -2283,42 +2285,46 @@ public:
           auto oldIndex = blockArgs[1];
           auto oldValue = blockArgs[2];
 
-          Value newIndex = rewriter.create<arith::IndexCastOp>(
-              nestedLoc, oldIndex.getType(),
-              rewriter.create<linalg::IndexOp>(loc, axis));
+          Value newIndex = arith::IndexCastOp::create(
+              rewriter, nestedLoc, oldIndex.getType(),
+              linalg::IndexOp::create(rewriter, loc, axis));
 
           Value predicate;
           if (isa<FloatType>(inElementTy)) {
             if (argmaxOp.getNanMode() == "IGNORE") {
               // Only update index & max value for non NaN values. If all
               // values are NaNs, the initial index will be return which is 0.
-              predicate = rewriter.create<arith::CmpFOp>(
-                  nestedLoc, arith::CmpFPredicate::OGT, newValue, oldValue);
+              predicate = arith::CmpFOp::create(rewriter, nestedLoc,
+                                                arith::CmpFPredicate::OGT,
+                                                newValue, oldValue);
             } else {
               // Update max value if either of the following is true:
               // - new value is bigger
               // - cur max is not NaN and new value is NaN
-              Value gt = rewriter.create<arith::CmpFOp>(
-                  nestedLoc, arith::CmpFPredicate::UGT, newValue, oldValue);
-              Value oldNonNaN = rewriter.create<arith::CmpFOp>(
-                  nestedLoc, arith::CmpFPredicate::ORD, oldValue, oldValue);
-              predicate = rewriter.create<arith::AndIOp>(
-                  nestedLoc, rewriter.getI1Type(), gt, oldNonNaN);
+              Value gt = arith::CmpFOp::create(rewriter, nestedLoc,
+                                               arith::CmpFPredicate::UGT,
+                                               newValue, oldValue);
+              Value oldNonNaN = arith::CmpFOp::create(rewriter, nestedLoc,
+                                                      arith::CmpFPredicate::ORD,
+                                                      oldValue, oldValue);
+              predicate = arith::AndIOp::create(
+                  rewriter, nestedLoc, rewriter.getI1Type(), gt, oldNonNaN);
             }
           } else if (isa<IntegerType>(inElementTy)) {
-            predicate = rewriter.create<arith::CmpIOp>(
-                nestedLoc, arith::CmpIPredicate::sgt, newValue, oldValue);
+            predicate = arith::CmpIOp::create(rewriter, nestedLoc,
+                                              arith::CmpIPredicate::sgt,
+                                              newValue, oldValue);
           } else {
             didEncounterError = true;
             return;
           }
 
-          auto resultMax = rewriter.create<arith::SelectOp>(
-              nestedLoc, predicate, newValue, oldValue);
-          auto resultIndex = rewriter.create<arith::SelectOp>(
-              nestedLoc, predicate, newIndex, oldIndex);
-          nestedBuilder.create<linalg::YieldOp>(
-              nestedLoc, ValueRange({resultIndex, resultMax}));
+          auto resultMax = arith::SelectOp::create(
+              rewriter, nestedLoc, predicate, newValue, oldValue);
+          auto resultIndex = arith::SelectOp::create(
+              rewriter, nestedLoc, predicate, newIndex, oldIndex);
+          linalg::YieldOp::create(nestedBuilder, nestedLoc,
+                                  ValueRange({resultIndex, resultMax}));
         });
 
     if (didEncounterError)
@@ -2351,9 +2357,8 @@ public:
 
     auto loc = op.getLoc();
     auto emptyTensor =
-        rewriter
-            .create<tensor::EmptyOp>(loc, resultTy.getShape(), resultElementTy,
-                                     dynamicDims)
+        tensor::EmptyOp::create(rewriter, loc, resultTy.getShape(),
+                                resultElementTy, dynamicDims)
             .getResult();
 
     SmallVector<AffineMap, 2> affineMaps = {
@@ -2363,19 +2368,19 @@ public:
             rewriter.getContext()),
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
 
-    auto genericOp = rewriter.create<linalg::GenericOp>(
-        loc, ArrayRef<Type>({resultTy}), ValueRange{indices},
+    auto genericOp = linalg::GenericOp::create(
+        rewriter, loc, ArrayRef<Type>({resultTy}), ValueRange{indices},
         ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(resultTy.getRank()),
         [&](OpBuilder &b, Location loc, ValueRange args) {
           auto indexValue = args[0];
-          auto index0 = rewriter.create<linalg::IndexOp>(loc, 0);
-          Value index1 = rewriter.create<arith::IndexCastOp>(
-              loc, rewriter.getIndexType(), indexValue);
-          auto index2 = rewriter.create<linalg::IndexOp>(loc, 2);
-          Value extract = rewriter.create<tensor::ExtractOp>(
-              loc, input, ValueRange{index0, index1, index2});
-          rewriter.create<linalg::YieldOp>(loc, extract);
+          auto index0 = linalg::IndexOp::create(rewriter, loc, 0);
+          Value index1 = arith::IndexCastOp::create(
+              rewriter, loc, rewriter.getIndexType(), indexValue);
+          auto index2 = linalg::IndexOp::create(rewriter, loc, 2);
+          Value extract = tensor::ExtractOp::create(
+              rewriter, loc, input, ValueRange{index0, index1, index2});
+          linalg::YieldOp::create(rewriter, loc, extract);
         });
     rewriter.replaceOp(op, genericOp.getResult(0));
     return success();
@@ -2424,22 +2429,22 @@ public:
     for (int i = 0; i < resultTy.getRank(); ++i) {
       if (inputTy.isDynamicDim(i)) {
         dynDims.push_back(
-            rewriter.create<tensor::DimOp>(loc, op.getOperand(0), i));
+            tensor::DimOp::create(rewriter, loc, op.getOperand(0), i));
       }
     }
 
-    auto emptyTensor = rewriter
-                           .create<tensor::EmptyOp>(loc, resultTy.getShape(),
-                                                    resultElementTy, dynDims)
-                           .getResult();
+    auto emptyTensor =
+        tensor::EmptyOp::create(rewriter, loc, resultTy.getShape(),
+                                resultElementTy, dynDims)
+            .getResult();
 
     SmallVector<AffineMap, 2> affineMaps = {
         rewriter.getMultiDimIdentityMap(resultTy.getRank()),
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
 
-    auto genericOp = rewriter.create<linalg::GenericOp>(
-        loc, resultTy, ValueRange({input}), ValueRange{emptyTensor}, affineMaps,
-        getNParallelLoopsAttrs(resultTy.getRank()));
+    auto genericOp = linalg::GenericOp::create(
+        rewriter, loc, resultTy, ValueRange({input}), ValueRange{emptyTensor},
+        affineMaps, getNParallelLoopsAttrs(resultTy.getRank()));
     rewriter.replaceOp(op, genericOp.getResult(0));
 
     {
@@ -2452,69 +2457,69 @@ public:
       rewriter.setInsertionPointToStart(block);
       if (inputElementTy.isInteger(8) && tableElementTy.isInteger(8) &&
           resultElementTy.isInteger(8)) {
-        Value index = rewriter.create<arith::IndexCastOp>(
-            loc, rewriter.getIndexType(), inputValue);
-        Value offset = rewriter.create<arith::ConstantIndexOp>(loc, 128);
-        index = rewriter.create<arith::AddIOp>(loc, rewriter.getIndexType(),
-                                               index, offset);
+        Value index = arith::IndexCastOp::create(
+            rewriter, loc, rewriter.getIndexType(), inputValue);
+        Value offset = arith::ConstantIndexOp::create(rewriter, loc, 128);
+        index = arith::AddIOp::create(rewriter, loc, rewriter.getIndexType(),
+                                      index, offset);
         Value extract =
-            rewriter.create<tensor::ExtractOp>(loc, table, ValueRange{index});
-        rewriter.create<linalg::YieldOp>(loc, extract);
+            tensor::ExtractOp::create(rewriter, loc, table, ValueRange{index});
+        linalg::YieldOp::create(rewriter, loc, extract);
         return success();
       }
 
       if (inputElementTy.isInteger(16) && tableElementTy.isInteger(16) &&
           resultElementTy.isInteger(32)) {
-        Value extend = rewriter.create<arith::ExtSIOp>(
-            loc, rewriter.getI32Type(), inputValue);
-
-        auto offset = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getI32IntegerAttr(32768));
-        auto seven = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getI32IntegerAttr(7));
-        auto one = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getI32IntegerAttr(1));
-        auto b1111111 = rewriter.create<arith::ConstantOp>(
-            loc, rewriter.getI32IntegerAttr(127));
+        Value extend = arith::ExtSIOp::create(
+            rewriter, loc, rewriter.getI32Type(), inputValue);
+
+        auto offset = arith::ConstantOp::create(
+            rewriter, loc, rewriter.getI32IntegerAttr(32768));
+        auto seven = arith::ConstantOp::create(rewriter, loc,
+                                               rewriter.getI32IntegerAttr(7));
+        auto one = arith::ConstantOp::create(rewriter, loc,
+                                             rewriter.getI32IntegerAttr(1));
+        auto b1111111 = arith::ConstantOp::create(
+            rewriter, loc, rewriter.getI32IntegerAttr(127));
 
         // Compute the index and fractional part from the input value:
         // value = value + 32768
         // index = value >> 7;
         // fraction = 0x01111111 & value
-        auto extendAdd = rewriter.create<arith::AddIOp>(loc, extend, offset);
-        Value index = rewriter.create<arith::ShRUIOp>(loc, extendAdd, seven);
+        auto extendAdd = arith::AddIOp::create(rewriter, loc, extend, offset);
+        Value index = arith::ShRUIOp::create(rewriter, loc, extendAdd, seven);
         Value fraction =
-            rewriter.create<arith::AndIOp>(loc, extendAdd, b1111111);
+            arith::AndIOp::create(rewriter, loc, extendAdd, b1111111);
 
         // Extract the base and next values from the table.
         // base = (int32_t) table[index];
         // next = (int32_t) table[index + 1];
-        Value indexPlusOne = rewriter.create<arith::AddIOp>(loc, index, one);
+        Value indexPlusOne = arith::AddIOp::create(rewriter, loc, index, one);
 
-        index = rewriter.create<arith::IndexCastOp>(
-            loc, rewriter.getIndexType(), index);
-        indexPlusOne = rewriter.create<arith::IndexCastOp>(
-            loc, rewriter.getIndexType(), indexPlusOne);
+        index = arith::IndexCastOp::create(rewriter, loc,
+                                           rewriter.getIndexType(), index);
+        indexPlusOne = arith::IndexCastOp::create(
+            rewriter, loc, rewriter.getIndexType(), indexPlusOne);
 
         Value base =
-            rewriter.create<tensor::ExtractOp>(loc, table, ValueRange{index});
-        Value next = rewriter.create<tensor::ExtractOp>(
-            loc, table, ValueRange{indexPlusOne});
+            tensor::ExtractOp::create(rewriter, loc, table, ValueRange{index});
+        Value next = tensor::ExtractOp::create(rewriter, loc, table,
+                                               ValueRange{indexPlusOne});
 
         base =
-            rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), base);
+            arith::ExtSIOp::create(rewriter, loc, rewriter.getI32Type(), base);
         next =
-            rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), next);
+            arith::ExtSIOp::create(rewriter, loc, rewriter.getI32Type(), next);
 
         // Use the fractional part to interpolate between the input values:
         // result = (base << 7) + (next - base) * fraction
-        Value baseScaled = rewriter.create<arith::ShLIOp>(loc, base, seven);
-        Value diff = rewriter.create<arith::SubIOp>(loc, next, base);
-        Value diffScaled = rewriter.create<arith::MulIOp>(loc, diff, fraction);
+        Value baseScaled = arith::ShLIOp::create(rewriter, loc, base, seven);
+        Value diff = arith::SubIOp::create(rewriter, loc, next, base);
+        Value diffScaled = arith::MulIOp::create(rewriter, loc, diff, fraction);
         Value result =
-            rewriter.create<arith::AddIOp>(loc, baseScaled, diffScaled);
+            arith::AddIOp::create(rewriter, loc, baseScaled, diffScaled);
 
-        rewriter.create<linalg::YieldOp>(loc, result);
+        linalg::YieldOp::create(rewriter, loc, result);
 
         return success();
       }
@@ -2532,8 +2537,8 @@ struct RFFT2dConverter final : public OpRewritePattern<RFFT2dOp> {
 
   static OpFoldResult halfPlusOne(OpBuilder &builder, Location loc,
                                   OpFoldResult ofr) {
-    auto one = builder.create<arith::ConstantIndexOp>(loc, 1);
-    auto two = builder.create<arith::ConstantIndexOp>(loc, 2);
+    auto one = arith::ConstantIndexOp::create(builder, loc, 1);
+    auto two = arith::ConstantIndexOp::create(builder, loc, 2);
 
     auto value = getValueOrCreateConstantIndexOp(builder, loc, ofr);
     auto divBy2 = builder.createOrFold<arith::DivUIOp>(loc, value, two);
@@ -2562,30 +2567,30 @@ struct RFFT2dConverter final : public OpRewritePattern<RFFT2dOp> {
                                 RankedTensorType type,
                                 llvm::ArrayRef<Value> dynamicSizes) {
     auto emptyTensor =
-        rewriter.create<tensor::EmptyOp>(loc, type, dynamicSizes);
+        tensor::EmptyOp::create(rewriter, loc, type, dynamicSizes);
     auto fillValueAttr = rewriter.getZeroAttr(type.getElementType());
-    auto fillValue = rewriter.create<arith::ConstantOp>(loc, fillValueAttr);
-    auto filledTensor = rewriter
-                            .create<linalg::FillOp>(loc, ValueRange{fillValue},
-                                                    ValueRange{emptyTensor})
-                            .result();
+    auto fillValue = arith::ConstantOp::create(rewriter, loc, fillValueAttr);
+    auto filledTensor =
+        linalg::FillOp::create(rewriter, loc, ValueRange{fillValue},
+                               ValueRange{emptyTensor})
+            .result();
     return filledTensor;
   }
 
   static Value castIndexToFloat(OpBuilder &builder, Location loc,
                                 FloatType type, Value value) {
-    auto integerVal = builder.create<arith::IndexCastUIOp>(
-        loc,
+    auto integerVal = arith::IndexCastUIOp::create(
+        builder, loc,
         type.getIntOrFloatBitWidth() > 32 ? builder.getI64Type()
                                           : builder.getI32Type(),
         value);
 
-    return builder.create<arith::UIToFPOp>(loc, type, integerVal);
+    return arith::UIToFPOp::create(builder, loc, type, integerVal);
   }
 
   static Value createLinalgIndex(OpBuilder &builder, Location loc,
                                  FloatType type, int64_t index) {
-    auto indexVal = builder.create<linalg::IndexOp>(loc, index);
+    auto indexVal = linalg::IndexOp::create(builder, loc, index);
     return castIndexToFloat(builder, loc, type, indexVal);
   }
 
@@ -2640,7 +2645,7 @@ struct RFFT2dConverter final : public OpRewritePattern<RFFT2dOp> {
 
     // Constants and dimension sizes
     auto twoPiAttr = rewriter.getFloatAttr(elementType, 6.283185307179586);
-    auto twoPi = rewriter.create<arith::ConstantOp>(loc, twoPiAttr);
+    auto twoPi = arith::ConstantOp::create(rewriter, loc, twoPiAttr);
     auto constH = castIndexToFloat(rewriter, loc, elementType, dimH);
     auto constW = castIndexToFloat(rewriter, loc, elementType, dimW);
 
@@ -2650,43 +2655,45 @@ struct RFFT2dConverter final : public OpRewritePattern<RFFT2dOp> {
       Value sumImag = args[2];
 
       // Indices for angle computation
-      Value oy = builder.create<linalg::IndexOp>(loc, 1);
-      Value ox = builder.create<linalg::IndexOp>(loc, 2);
-      Value iy = builder.create<linalg::IndexOp>(loc, 3);
-      Value ix = builder.create<linalg::IndexOp>(loc, 4);
+      Value oy = linalg::IndexOp::create(builder, loc, 1);
+      Value ox = linalg::IndexOp::create(builder, loc, 2);
+      Value iy = linalg::IndexOp::create(builder, loc, 3);
+      Value ix = linalg::IndexOp::create(builder, loc, 4);
 
       // Calculating angle without integer parts of components as sin/cos are
       // periodic: angle = 2 * pi() * ( ( (iy * oy) % H) / H + ( (ix * ox) % W )
       // / W);
-      auto iyXoy = builder.create<index::MulOp>(loc, iy, oy);
-      auto ixXox = builder.create<index::MulOp>(loc, ix, ox);
+      auto iyXoy = index::MulOp::create(builder, loc, iy, oy);
+      auto ixXox = index::MulOp::create(builder, loc, ix, ox);
 
-      auto iyRem = builder.create<index::RemUOp>(loc, iyXoy, dimH);
-      auto ixRem = builder.create<index::RemUOp>(loc, ixXox, dimW);
+      auto iyRem = index::RemUOp::create(builder, loc, iyXoy, dimH);
+      auto ixRem = index::RemUOp::create(builder, loc, ixXox, dimW);
 
       auto iyRemFloat = castIndexToFloat(builder, loc, elementType, iyRem);
       auto ixRemFloat = castIndexToFloat(builder, loc, elementType, ixRem);
 
-      auto yComponent = builder.create<arith::DivFOp>(loc, iyRemFloat, constH);
-      auto xComponent = builder.create<arith::DivFOp>(loc, ixRemFloat, constW);
-      auto sumXY = builder.create<arith::AddFOp>(loc, yComponent, xComponent);
-      auto angle = builder.create<arith::MulFOp>(loc, twoPi, sumXY);
+      auto yComponent = arith::DivFOp::create(builder, loc, iyRemFloat, constH);
+      auto xComponent = arith::DivFOp::create(builder, loc, ixRemFloat, constW);
+      auto sumXY = arith::AddFOp::create(builder, loc, yComponent, xComponent);
+      auto angle = arith::MulFOp::create(builder, loc, twoPi, sumXY);
 
       // realComponent = valReal * cos(angle)
       // imagComponent = valReal * sin(angle)
-      auto cosAngle = builder.create<math::CosOp>(loc, angle);
-      auto sinAngle = builder.create<math::SinOp>(loc, angle);
+      auto cosAngle = math::CosOp::create(builder, loc, angle);
+      auto sinAngle = math::SinOp::create(builder, loc, angle);
       auto realComponent =
-          builder.create<arith::MulFOp>(loc, valReal, cosAngle);
+          arith::MulFOp::create(builder, loc, valReal, cosAngle);
       auto imagComponent =
-          builder.create<arith::MulFOp>(loc, valReal, sinAngle);
+          arith::MulFOp::create(builder, loc, valReal, sinAngle);
 
       // outReal = sumReal + realComponent
       // outImag = sumImag - imagComponent
-      auto outReal = builder.create<arith::AddFOp>(loc, sumReal, realComponent);
-      auto outImag = builder.create<arith::SubFOp>(loc, sumImag, imagComponent);
+      auto outReal =
+          arith::AddFOp::create(builder, loc, sumReal, realComponent);
+      auto outImag =
+          arith::SubFOp::create(builder, loc, sumImag, imagComponent);
 
-      builder.create<linalg::YieldOp>(loc, ValueRange{outReal, outImag});
+      linalg::YieldOp::create(builder, loc, ValueRange{outReal, outImag});
     };
 
     rewriter.replaceOpWithNewOp<linalg::GenericOp>(
@@ -2760,7 +2767,7 @@ struct FFT2dConverter final : OpRewritePattern<FFT2dOp> {
 
     // Constants and dimension sizes
     auto twoPiAttr = rewriter.getFloatAttr(real_el_ty, 6.283185307179586);
-    auto twoPi = rewriter.create<arith::ConstantOp>(loc, twoPiAttr);
+    auto twoPi = arith::ConstantOp::create(rewriter, loc, twoPiAttr);
     Value constH =
         RFFT2dConverter::castIndexToFloat(rewriter, loc, real_el_ty, dimH);
     Value constW =
@@ -2773,57 +2780,59 @@ struct FFT2dConverter final : OpRewritePattern<FFT2dOp> {
       Value sumImag = args[3];
 
       // Indices for angle computation
-      Value oy = builder.create<linalg::IndexOp>(loc, 1);
-      Value ox = builder.create<linalg::IndexOp>(loc, 2);
-      Value iy = builder.create<linalg::IndexOp>(loc, 3);
-      Value ix = builder.create<linalg::IndexOp>(loc, 4);
+      Value oy = linalg::IndexOp::create(builder, loc, 1);
+      Value ox = linalg::IndexOp::create(builder, loc, 2);
+      Value iy = linalg::IndexOp::create(builder, loc, 3);
+      Value ix = linalg::IndexOp::create(builder, loc, 4);
 
       // float_t angle = sign_val * 2 * pi() * ( ( (iy * oy) % H) / H + ( (ix *
       // ox) % W ) / W);
-      auto iyXoy = builder.create<index::MulOp>(loc, iy, oy);
-      auto ixXox = builder.create<index::MulOp>(loc, ix, ox);
+      auto iyXoy = index::MulOp::create(builder, loc, iy, oy);
+      auto ixXox = index::MulOp::create(builder, loc, ix, ox);
 
-      auto iyRem = builder.create<index::RemUOp>(loc, iyXoy, dimH);
-      auto ixRem = builder.create<index::RemUOp>(loc, ixXox, dimW);
+      auto iyRem = index::RemUOp::create(builder, loc, iyXoy, dimH);
+      auto ixRem = index::RemUOp::create(builder, loc, ixXox, dimW);
 
       auto iyRemFloat =
           RFFT2dConverter::castIndexToFloat(builder, loc, real_el_ty, iyRem);
       auto ixRemFloat =
           RFFT2dConverter::castIndexToFloat(builder, loc, real_el_ty, ixRem);
 
-      auto yComponent = builder.create<arith::DivFOp>(loc, iyRemFloat, constH);
-      auto xComponent = builder.create<arith::DivFOp>(loc, ixRemFloat, constW);
+      auto yComponent = arith::DivFOp::create(builder, loc, iyRemFloat, constH);
+      auto xComponent = arith::DivFOp::create(builder, loc, ixRemFloat, constW);
 
-      auto sumXY = builder.create<arith::AddFOp>(loc, yComponent, xComponent);
-      auto angle = builder.create<arith::MulFOp>(loc, twoPi, sumXY);
+      auto sumXY = arith::AddFOp::create(builder, loc, yComponent, xComponent);
+      auto angle = arith::MulFOp::create(builder, loc, twoPi, sumXY);
 
       if (inverse.getValue()) {
-        angle = builder.create<arith::MulFOp>(
-            loc, angle,
-            rewriter.create<arith::ConstantOp>(
-                loc, rewriter.getFloatAttr(real_el_ty, -1.0)));
+        angle = arith::MulFOp::create(
+            builder, loc, angle,
+            arith::ConstantOp::create(rewriter, loc,
+                                      rewriter.getFloatAttr(real_el_ty, -1.0)));
       }
 
       // realComponent = val_real * cos(a) + val_imag * sin(a);
       // imagComponent = -val_real * sin(a) + val_imag * cos(a);
-      auto cosAngle = builder.create<math::CosOp>(loc, angle);
-      auto sinAngle = builder.create<math::SinOp>(loc, angle);
+      auto cosAngle = math::CosOp::create(builder, loc, angle);
+      auto sinAngle = math::SinOp::create(builder, loc, angle);
 
-      auto rcos = builder.create<arith::MulFOp>(loc, valReal, cosAngle);
-      auto rsin = builder.create<arith::MulFOp>(loc, valImag, sinAngle);
-      auto realComponent = builder.create<arith::AddFOp>(loc, rcos, rsin);
+      auto rcos = arith::MulFOp::create(builder, loc, valReal, cosAngle);
+      auto rsin = arith::MulFOp::create(builder, loc, valImag, sinAngle);
+      auto realComponent = arith::AddFOp::create(builder, loc, rcos, rsin);
 
-      auto icos = builder.create<arith::MulFOp>(loc, valImag, cosAngle);
-      auto isin = builder.create<arith::MulFOp>(loc, valReal, sinAngle);
+      auto icos = arith::MulFOp::create(builder, loc, valImag, cosAngle);
+      auto isin = arith::MulFOp::create(builder, loc, valReal, sinAngle);
 
-      auto imagComponent = builder.create<arith::SubFOp>(loc, icos, isin);
+      auto imagComponent = arith::SubFOp::create(builder, loc, icos, isin);
 
       // outReal = sumReal + realComponent
       // outImag = sumImag - imagComponent
-      auto outReal = builder.create<arith::AddFOp>(loc, sumReal, realComponent);
-      auto outImag = builder.create<arith::AddFOp>(loc, sumImag, imagComponent);
+      auto outReal =
+          arith::AddFOp::create(builder, loc, sumReal, realComponent);
+      auto outImag =
+          arith::AddFOp::create(builder, loc, sumImag, imagComponent);
 
-      builder.create<linalg::YieldOp>(loc, ValueRange{outReal, outImag});
+      linalg::YieldOp::create(builder, loc, ValueRange{outReal, outImag});
     };
 
     rewriter.replaceOpWithNewOp<linalg::GenericOp>(