aboutsummaryrefslogtreecommitdiff
path: root/flang
diff options
context:
space:
mode:
authorKelvin Li <kli@ca.ibm.com>2023-07-10 23:13:34 -0400
committerKelvin Li <kli@ca.ibm.com>2023-07-24 14:03:50 -0400
commit447c3a358d080a7247b487a9b44edc2ee42f09b6 (patch)
treeeed10f436873de4e76a2ab181d209537b24c3d54 /flang
parentce89048b12585aeae7772006c4cec274ba06e01f (diff)
downloadllvm-447c3a358d080a7247b487a9b44edc2ee42f09b6.zip
llvm-447c3a358d080a7247b487a9b44edc2ee42f09b6.tar.gz
llvm-447c3a358d080a7247b487a9b44edc2ee42f09b6.tar.bz2
[flang] Add PowerPC vec_abs, vec_nmadd, vec_msub and vec_sel intrinsics
Co-authored-by: Paul Scoropan <1paulscoropan@gmail.com> Differential Revision: https://reviews.llvm.org/D154985
Diffstat (limited to 'flang')
-rw-r--r--flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h28
-rw-r--r--flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp174
-rw-r--r--flang/module/__ppc_intrinsics.f9099
-rw-r--r--flang/test/Lower/PowerPC/ppc-vec_abs.f90131
-rw-r--r--flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90124
-rw-r--r--flang/test/Lower/PowerPC/ppc-vec_sel.f90492
6 files changed, 1047 insertions, 1 deletions
diff --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
index 83079116..5c62b4f 100644
--- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
@@ -18,6 +18,7 @@ namespace fir {
/// Enums used to templatize vector intrinsic function generators. Enum does
/// not contain every vector intrinsic, only intrinsics that share generators.
enum class VecOp {
+ Abs,
Add,
And,
Anyge,
@@ -28,7 +29,10 @@ enum class VecOp {
Convert,
Ctf,
Cvf,
+ Nmadd,
+ Msub,
Mul,
+ Sel,
Sl,
Sld,
Sldw,
@@ -94,6 +98,20 @@ convertVecArgs(fir::FirOpBuilder &builder, mlir::Location loc,
return newArgs;
}
+// This overload method is used only if arguments are of different types.
+static inline llvm::SmallVector<mlir::Value, 4>
+convertVecArgs(fir::FirOpBuilder &builder, mlir::Location loc,
+ llvm::SmallVectorImpl<VecTypeInfo> &vecTyInfo,
+ llvm::SmallVector<mlir::Value, 4> args) {
+ llvm::SmallVector<mlir::Value, 4> newArgs;
+ for (size_t i = 0; i < args.size(); i++) {
+ mlir::Type ty{vecTyInfo[i].toMlirVectorType(builder.getContext())};
+ assert(ty && "unknown mlir vector type");
+ newArgs.push_back(builder.createConvert(loc, ty, args[i]));
+ }
+ return newArgs;
+}
+
struct PPCIntrinsicLibrary : IntrinsicLibrary {
// Constructors.
@@ -106,6 +124,9 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
template <bool isImm>
void genMtfsf(llvm::ArrayRef<fir::ExtendedValue>);
+ fir::ExtendedValue genVecAbs(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args);
+
template <VecOp>
fir::ExtendedValue
genVecAddAndMulSubXor(mlir::Type resultType,
@@ -124,8 +145,15 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue> args);
template <VecOp>
+ fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args);
+
+ template <VecOp>
fir::ExtendedValue genVecShift(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+
+ fir::ExtendedValue genVecSel(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args);
};
const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name);
diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
index 0c949f9..19be56d 100644
--- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@@ -33,6 +33,10 @@ static constexpr IntrinsicHandler ppcHandlers[]{
static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>),
{{{"bf", asValue}, {"i", asValue}}},
/*isElemental=*/false},
+ {"__ppc_vec_abs",
+ static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs),
+ {{{"arg1", asValue}}},
+ /*isElemental=*/true},
{"__ppc_vec_add",
static_cast<IntrinsicLibrary::ExtendedGenerator>(
&PI::genVecAddAndMulSubXor<VecOp::Add>),
@@ -83,11 +87,25 @@ static constexpr IntrinsicHandler ppcHandlers[]{
&PI::genVecConvert<VecOp::Cvf>),
{{{"arg1", asValue}}},
/*isElemental=*/true},
+ {"__ppc_vec_msub",
+ static_cast<IntrinsicLibrary::ExtendedGenerator>(
+ &PI::genVecNmaddMsub<VecOp::Msub>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
+ /*isElemental=*/true},
{"__ppc_vec_mul",
static_cast<IntrinsicLibrary::ExtendedGenerator>(
&PI::genVecAddAndMulSubXor<VecOp::Mul>),
{{{"arg1", asValue}, {"arg2", asValue}}},
/*isElemental=*/true},
+ {"__ppc_vec_nmadd",
+ static_cast<IntrinsicLibrary::ExtendedGenerator>(
+ &PI::genVecNmaddMsub<VecOp::Nmadd>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
+ /*isElemental=*/true},
+ {"__ppc_vec_sel",
+ static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
+ /*isElemental=*/true},
{"__ppc_vec_sl",
static_cast<IntrinsicLibrary::ExtendedGenerator>(
&PI::genVecShift<VecOp::Sl>),
@@ -337,6 +355,79 @@ void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) {
builder.create<fir::CallOp>(loc, funcOp, scalarArgs);
}
+// VEC_ABS
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 1);
+ auto context{builder.getContext()};
+ auto argBases{getBasesForArgs(args)};
+ auto vTypeInfo{getVecTypeFromFir(argBases[0])};
+
+ mlir::func::FuncOp funcOp{nullptr};
+ mlir::FunctionType ftype;
+ llvm::StringRef fname{};
+ if (vTypeInfo.isFloat()) {
+ if (vTypeInfo.isFloat32()) {
+ fname = "llvm.fabs.v4f32";
+ ftype =
+ genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder);
+ } else if (vTypeInfo.isFloat64()) {
+ fname = "llvm.fabs.v2f64";
+ ftype =
+ genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder);
+ }
+
+ funcOp = builder.addNamedFunction(loc, fname, ftype);
+ auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])};
+ return callOp.getResult(0);
+ } else if (auto eleTy = vTypeInfo.eleTy.dyn_cast<mlir::IntegerType>()) {
+ // vec_abs(arg1) = max(0 - arg1, arg1)
+
+ auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)};
+ auto varg1{builder.createConvert(loc, newVecTy, argBases[0])};
+ // construct vector(0,..)
+ auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)};
+ auto vZero{
+ builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)};
+ auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)};
+
+ mlir::func::FuncOp funcOp{nullptr};
+ switch (eleTy.getWidth()) {
+ case 8:
+ fname = "llvm.ppc.altivec.vmaxsb";
+ ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
+ Ty::IntegerVector<1>>(context, builder);
+ break;
+ case 16:
+ fname = "llvm.ppc.altivec.vmaxsh";
+ ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
+ Ty::IntegerVector<2>>(context, builder);
+ break;
+ case 32:
+ fname = "llvm.ppc.altivec.vmaxsw";
+ ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
+ Ty::IntegerVector<4>>(context, builder);
+ break;
+ case 64:
+ fname = "llvm.ppc.altivec.vmaxsd";
+ ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
+ Ty::IntegerVector<8>>(context, builder);
+ break;
+ default:
+ llvm_unreachable("invalid integer size");
+ }
+ funcOp = builder.addNamedFunction(loc, fname, ftype);
+
+ mlir::Value args[] = {zeroSubVarg1, varg1};
+ auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)};
+ return builder.createConvert(loc, argBases[0].getType(),
+ callOp.getResult(0));
+ }
+
+ llvm_unreachable("unknown vector type");
+}
+
// VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR
template <VecOp vop>
fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor(
@@ -826,6 +917,89 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
}
}
+// VEC_NMADD, VEC_MSUB
+template <VecOp vop>
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ auto context{builder.getContext()};
+ auto argBases{getBasesForArgs(args)};
+ auto vTypeInfo{getVecTypeFromFir(argBases[0])};
+ auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)};
+ const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
+
+ static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{
+ {32,
+ std::make_pair(
+ "llvm.fma.v4f32",
+ genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>(
+ context, builder))},
+ {64,
+ std::make_pair(
+ "llvm.fma.v2f64",
+ genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>(
+ context, builder))}};
+
+ auto funcOp{builder.addNamedFunction(loc, std::get<0>(fmaMap[width]),
+ std::get<1>(fmaMap[width]))};
+ if (vop == VecOp::Nmadd) {
+ // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3)
+ auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
+
+ // We need to convert fir.vector to MLIR vector to use fneg and then back
+ // to fir.vector to store.
+ auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context),
+ callOp.getResult(0))};
+ auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)};
+ return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg);
+ } else if (vop == VecOp::Msub) {
+ // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3)
+ newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]);
+
+ auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
+ return callOp.getResult(0);
+ }
+ llvm_unreachable("Invalid vector operation for generator");
+}
+
+// VEC_SEL
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecSel(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ auto argBases{getBasesForArgs(args)};
+ llvm::SmallVector<VecTypeInfo, 4> vecTyInfos;
+ for (size_t i = 0; i < argBases.size(); i++) {
+ vecTyInfos.push_back(getVecTypeFromFir(argBases[i]));
+ }
+ auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)};
+
+ auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)};
+ auto negOne{builder.createIntegerConstant(loc, i8Ty, -1)};
+
+ // construct a constant <16 x i8> vector with value -1 for bitcast
+ auto bcVecTy{mlir::VectorType::get(16, i8Ty)};
+ auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)};
+
+ // bitcast arguments to bcVecTy
+ auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])};
+ auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])};
+ auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])};
+
+ // vec_sel(arg1, arg2, arg3) =
+ // (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...)))
+ auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)};
+ auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)};
+ auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)};
+ auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)};
+
+ auto bcRes{
+ builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)};
+
+ return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes);
+}
+
// VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO
template <VecOp vop>
fir::ExtendedValue
diff --git a/flang/module/__ppc_intrinsics.f90 b/flang/module/__ppc_intrinsics.f90
index 17f6180..58d2c81 100644
--- a/flang/module/__ppc_intrinsics.f90
+++ b/flang/module/__ppc_intrinsics.f90
@@ -23,15 +23,26 @@ module __ppc_intrinsics
! Vector intrinsic
!--------------------
!! ================ 1 argument function interface ================
+! vector(i) function f(vector(i))
+#define ELEM_FUNC_VIVI(VKIND) \
+ elemental vector(integer(VKIND)) function elem_func_vi##VKIND##vi##VKIND(arg1); \
+ vector(integer(VKIND)), intent(in) :: arg1; \
+ end function ;
+
! vector(r) function f(vector(r))
#define ELEM_FUNC_VRVR_2(VKIND1, VKIND2) \
elemental vector(real(VKIND1)) function elem_func_vr##VKIND1##vr##VKIND2(arg1); \
vector(real(VKIND2)), intent(in) :: arg1; \
end function ;
+#define ELEM_FUNC_VRVR(VKIND) ELEM_FUNC_VRVR_2(VKIND, VKIND)
+ ELEM_FUNC_VIVI(1) ELEM_FUNC_VIVI(2) ELEM_FUNC_VIVI(4) ELEM_FUNC_VIVI(8)
ELEM_FUNC_VRVR_2(4,8) ELEM_FUNC_VRVR_2(8,4)
+ ELEM_FUNC_VRVR(4) ELEM_FUNC_VRVR(8)
+#undef ELEM_FUNC_VRVR
#undef ELEM_FUNC_VRVR_2
+#undef ELEM_FUNC_VIVI
!! ================ 2 arguments function interface ================
! vector(i) function f(vector(i), vector(i))
@@ -193,6 +204,27 @@ module __ppc_intrinsics
vector(real(VKIND)), intent(in) :: arg1, arg2, arg3; \
end function ;
+! vector(i) function f(vector(i), vector(i), vector(u))
+#define ELEM_FUNC_VIVIVIVU(VKIND) \
+ elemental vector(integer(VKIND)) function elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND(arg1, arg2, arg3); \
+ vector(integer(VKIND)), intent(in) :: arg1, arg2; \
+ vector(unsigned(VKIND)), intent(in) :: arg3; \
+ end function ;
+
+! vector(u) function f(vector(u), vector(u), vector(u))
+#define ELEM_FUNC_VUVUVUVU(VKIND) \
+ elemental vector(unsigned(VKIND)) function elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND(arg1, arg2, arg3); \
+ vector(unsigned(VKIND)), intent(in) :: arg1, arg2, arg3; \
+ end function ;
+
+! vector(r) function f(vector(r), vector(r), vector(u))
+#define ELEM_FUNC_VRVRVRVU(VKIND) \
+ elemental vector(real(VKIND)) function elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND(arg1, arg2, arg3); \
+ vector(real(VKIND)), intent(in) :: arg1, arg2; \
+ vector(unsigned(VKIND)), intent(in) :: arg3; \
+ end function ;
+
+
! vector(i) function f(vector(i), vector(i), i)
#define ELEM_FUNC_VIVIVII(VKIND) \
elemental vector(integer(VKIND)) function elem_func_vi##VKIND##vi##VKIND##vi##VKIND##i(arg1, arg2, arg3); \
@@ -217,6 +249,9 @@ module __ppc_intrinsics
!dir$ ignore_tkr(k) arg3; \
end function ;
+ ELEM_FUNC_VIVIVIVU(1) ELEM_FUNC_VIVIVIVU(2) ELEM_FUNC_VIVIVIVU(4) ELEM_FUNC_VIVIVIVU(8)
+ ELEM_FUNC_VUVUVUVU(1) ELEM_FUNC_VUVUVUVU(2) ELEM_FUNC_VUVUVUVU(4) ELEM_FUNC_VUVUVUVU(8)
+ ELEM_FUNC_VRVRVRVU(4) ELEM_FUNC_VRVRVRVU(8)
ELEM_FUNC_VRVRVRVR(4) ELEM_FUNC_VRVRVRVR(8)
ELEM_FUNC_VIVIVII(1) ELEM_FUNC_VIVIVII(2) ELEM_FUNC_VIVIVII(4) ELEM_FUNC_VIVIVII(8)
ELEM_FUNC_VUVUVUI(1) ELEM_FUNC_VUVUVUI(2) ELEM_FUNC_VUVUVUI(4) ELEM_FUNC_VUVUVUI(8)
@@ -226,6 +261,10 @@ module __ppc_intrinsics
#undef ELEM_FUNC_VUVUVUI
#undef ELEM_FUNC_VRVRVRI
#undef ELEM_FUNC_VRVRVRVR
+#undef ELEM_FUNC_VRVRVRVU
+#undef ELEM_FUNC_VRVRVRVR
+#undef ELEM_FUNC_VUVUVUVU
+#undef ELEM_FUNC_VIVIVIVU
end interface
@@ -387,10 +426,24 @@ module __ppc_intrinsics
!-------------------------
! vector function(vector)
!-------------------------
+#define VI_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND
#define VR_VR_2(NAME, VKIND1, VKIND2) __ppc_##NAME##_vr##VKIND1##vr##VKIND2
+#define VR_VR(NAME, VKIND) VR_VR_2(NAME, VKIND, VKIND)
+#define VEC_VI_VI(NAME, VKIND) \
+ procedure(elem_func_vi##VKIND##vi##VKIND) :: VI_VI(NAME, VKIND);
#define VEC_VR_VR_2(NAME, VKIND1, VKIND2) \
procedure(elem_func_vr##VKIND1##vr##VKIND2) :: VR_VR_2(NAME, VKIND1, VKIND2);
+#define VEC_VR_VR(NAME, VKIND) VEC_VR_VR_2(NAME, VKIND, VKIND)
+
+! vec_abs
+ VEC_VI_VI(vec_abs,1) VEC_VI_VI(vec_abs,2) VEC_VI_VI(vec_abs,4) VEC_VI_VI(vec_abs,8)
+ VEC_VR_VR(vec_abs,4) VEC_VR_VR(vec_abs,8)
+ interface vec_abs
+ procedure :: VI_VI(vec_abs,1), VI_VI(vec_abs,2), VI_VI(vec_abs,4), VI_VI(vec_abs,8)
+ procedure :: VR_VR(vec_abs,4), VR_VR(vec_abs,8)
+ end interface vec_abs
+ public :: vec_abs
! vec_cvf
VEC_VR_VR_2(vec_cvf,4,8) VEC_VR_VR_2(vec_cvf,8,4)
@@ -399,9 +452,13 @@ module __ppc_intrinsics
end interface vec_cvf
public :: vec_cvf
+#undef VEC_VR_VR
#undef VEC_VR_VR_2
+#undef VEC_VI_VI
+#undef VR_VR
#undef VR_VR_2
-
+#undef VI_VI
+
!---------------------------------
! vector function(vector, vector)
!---------------------------------
@@ -687,9 +744,18 @@ module __ppc_intrinsics
! vector function(vector, vector, vector)
!-----------------------------------------
#define VR_VR_VR_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND
+#define VI_VI_VI_VU(NAME, VKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND
+#define VU_VU_VU_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND
+#define VR_VR_VR_VU(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND
#define VEC_VR_VR_VR_VR(NAME, VKIND) \
procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND) :: VR_VR_VR_VR(NAME, VKIND);
+#define VEC_VI_VI_VI_VU(NAME, VKIND) \
+ procedure(elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND) :: VI_VI_VI_VU(NAME, VKIND);
+#define VEC_VU_VU_VU_VU(NAME, VKIND) \
+ procedure(elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND) :: VU_VU_VU_VU(NAME, VKIND);
+#define VEC_VR_VR_VR_VU(NAME, VKIND) \
+ procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND) :: VR_VR_VR_VU(NAME, VKIND);
! vec_madd
VEC_VR_VR_VR_VR(vec_madd,4) VEC_VR_VR_VR_VR(vec_madd,8)
@@ -698,6 +764,20 @@ module __ppc_intrinsics
end interface vec_madd
public :: vec_madd
+! vec_msub
+ VEC_VR_VR_VR_VR(vec_msub,4) VEC_VR_VR_VR_VR(vec_msub,8)
+ interface vec_msub
+ procedure :: VR_VR_VR_VR(vec_msub,4), VR_VR_VR_VR(vec_msub,8)
+ end interface vec_msub
+ public :: vec_msub
+
+! vec_nmadd
+ VEC_VR_VR_VR_VR(vec_nmadd,4) VEC_VR_VR_VR_VR(vec_nmadd,8)
+ interface vec_nmadd
+ procedure :: VR_VR_VR_VR(vec_nmadd,4), VR_VR_VR_VR(vec_nmadd,8)
+ end interface vec_nmadd
+ public :: vec_nmadd
+
! vec_nmsub
VEC_VR_VR_VR_VR(vec_nmsub,4) VEC_VR_VR_VR_VR(vec_nmsub,8)
interface vec_nmsub
@@ -705,7 +785,24 @@ module __ppc_intrinsics
end interface vec_nmsub
public :: vec_nmsub
+! vec_sel
+ VEC_VI_VI_VI_VU(vec_sel,1) VEC_VI_VI_VI_VU(vec_sel,2) VEC_VI_VI_VI_VU(vec_sel,4) VEC_VI_VI_VI_VU(vec_sel,8)
+ VEC_VU_VU_VU_VU(vec_sel,1) VEC_VU_VU_VU_VU(vec_sel,2) VEC_VU_VU_VU_VU(vec_sel,4) VEC_VU_VU_VU_VU(vec_sel,8)
+ VEC_VR_VR_VR_VU(vec_sel,4) VEC_VR_VR_VR_VU(vec_sel,8)
+ interface vec_sel
+ procedure :: VI_VI_VI_VU(vec_sel,1), VI_VI_VI_VU(vec_sel,2), VI_VI_VI_VU(vec_sel,4), VI_VI_VI_VU(vec_sel,8)
+ procedure :: VU_VU_VU_VU(vec_sel,1), VU_VU_VU_VU(vec_sel,2), VU_VU_VU_VU(vec_sel,4), VU_VU_VU_VU(vec_sel,8)
+ procedure :: VR_VR_VR_VU(vec_sel,4), VR_VR_VR_VU(vec_sel,8)
+ end interface vec_sel
+ public :: vec_sel
+
+#undef VEC_VI_VI_VI_VU
+#undef VEC_VU_VU_VU_VU
+#undef VEC_VR_VR_VR_VU
#undef VEC_VR_VR_VR_VR
+#undef VI_VI_VI_VU
+#undef VU_VU_VU_VU
+#undef VR_VR_VR_VU
#undef VR_VR_VR_VR
!----------------------------------
diff --git a/flang/test/Lower/PowerPC/ppc-vec_abs.f90 b/flang/test/Lower/PowerPC/ppc-vec_abs.f90
new file mode 100644
index 0000000..d7fdd04
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec_abs.f90
@@ -0,0 +1,131 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
+! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_abs
+!----------------------
+
+! CHECK-LABEL: vec_abs_i1
+subroutine vec_abs_i1(arg1)
+ vector(integer(1)) :: arg1, r
+ r = vec_abs(arg1)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
+! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i8
+! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<16xi8>
+! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsb(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<16xi8>, vector<16xi8>) -> !fir.vector<16:i8>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i8) : i8
+! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsb(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
+
+! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[sub:.*]] = sub <16 x i8> zeroinitializer, %[[arg1]]
+! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %[[sub]], <16 x i8> %[[arg1]])
+end subroutine vec_abs_i1
+
+! CHECK-LABEL: vec_abs_i2
+subroutine vec_abs_i2(arg1)
+ vector(integer(2)) :: arg1, r
+ r = vec_abs(arg1)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i16
+! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i16 to vector<8xi16>
+! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<8xi16>
+! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsh(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<8xi16>, vector<8xi16>) -> !fir.vector<8:i16>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i16) : i16
+! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<8xi16>) : vector<8xi16>
+! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<8xi16>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsh(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[sub:.*]] = sub <8 x i16> zeroinitializer, %[[arg1]]
+! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %[[sub]], <8 x i16> %[[arg1]])
+end subroutine vec_abs_i2
+
+! CHECK-LABEL: vec_abs_i4
+subroutine vec_abs_i4(arg1)
+ vector(integer(4)) :: arg1, r
+ r = vec_abs(arg1)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i32
+! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i32 to vector<4xi32>
+! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<4xi32>
+! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsw(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i32) : i32
+! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<4xi32>) : vector<4xi32>
+! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<4xi32>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsw(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[sub:.*]] = sub <4 x i32> zeroinitializer, %[[arg1]]
+! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %[[sub]], <4 x i32> %[[arg1]])
+end subroutine vec_abs_i4
+
+! CHECK-LABEL: vec_abs_i8
+subroutine vec_abs_i8(arg1)
+ vector(integer(8)) :: arg1, r
+ r = vec_abs(arg1)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
+! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i64
+! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i64 to vector<2xi64>
+! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<2xi64>
+! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsd(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<2xi64>, vector<2xi64>) -> !fir.vector<2:i64>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i64) : i64
+! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<2xi64>) : vector<2xi64>
+! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<2xi64>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsd(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
+
+! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[sub:.*]] = sub <2 x i64> zeroinitializer, %[[arg1]]
+! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %[[sub]], <2 x i64> %[[arg1]])
+end subroutine vec_abs_i8
+
+! CHECK-LABEL: vec_abs_r4
+subroutine vec_abs_r4(arg1)
+ vector(real(4)) :: arg1, r
+ r = vec_abs(arg1)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.fabs.v4f32(%[[arg1]]) fastmath<contract> : (!fir.vector<4:f32>) -> !fir.vector<4:f32>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.fabs.v4f32(%[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>) -> vector<4xf32>
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %{{[0-9]+}} = call contract <4 x float> @llvm.fabs.v4f32(<4 x float> %[[arg1]])
+end subroutine vec_abs_r4
+
+! CHECK-LABEL: vec_abs_r8
+subroutine vec_abs_r8(arg1)
+ vector(real(8)) :: arg1, r
+ r = vec_abs(arg1)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.fabs.v2f64(%[[arg1]]) fastmath<contract> : (!fir.vector<2:f64>) -> !fir.vector<2:f64>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.fabs.v2f64(%[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>) -> vector<2xf64>
+
+! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! CHECK: %{{[0-9]+}} = call contract <2 x double> @llvm.fabs.v2f64(<2 x double> %[[arg1]])
+end subroutine vec_abs_r8
+
diff --git a/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90 b/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
index a65febf..1f95223 100644
--- a/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
@@ -502,3 +502,127 @@ subroutine vec_nmsub_testf64(x, y, z)
! CHECK: %[[vnmsub:.*]] = call contract <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
! CHECK: store <2 x double> %[[vnmsub]], ptr %{{[0-9]}}, align 16
end subroutine vec_nmsub_testf64
+
+! vec_msub
+
+! CHECK-LABEL: vec_msub_testf32
+subroutine vec_msub_testf32(x, y, z)
+ vector(real(4)) :: vmsub, x, y, z
+ vmsub = vec_msub(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[nz:.*]] = arith.negf %[[vz]] fastmath<contract> : vector<4xf32>
+! CHECK-FIR: %[[vmsub:.*]] = fir.call @llvm.fma.v4f32(%[[vx]], %[[vy]], %[[nz]]) fastmath<contract> : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[vmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[nz:.*]] = llvm.fneg %[[z]] {fastmathFlags = #llvm.fastmath<contract>} : vector<4xf32>
+! CHECK-LLVMIR: %[[vmsub:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[nz]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+! CHECK-LLVMIR: llvm.store %[[vmsub]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
+
+! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[nz:.*]] = fneg contract <4 x float> %[[z]]
+! CHECK: %[[vmsub:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[nz]])
+! CHECK: store <4 x float> %[[vmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_msub_testf32
+
+! CHECK-LABEL: vec_msub_testf64
+subroutine vec_msub_testf64(x, y, z)
+ vector(real(8)) :: vmsub, x, y, z
+ vmsub = vec_msub(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[nz:.*]] = arith.negf %[[vz]] fastmath<contract> : vector<2xf64>
+! CHECK-FIR: %[[vmsub:.*]] = fir.call @llvm.fma.v2f64(%[[vx]], %[[vy]], %[[nz]]) fastmath<contract> : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> !fir.vector<2:f64>
+! CHECK-FIR: fir.store %[[vmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[nz:.*]] = llvm.fneg %[[z]] {fastmathFlags = #llvm.fastmath<contract>} : vector<2xf64>
+! CHECK-LLVMIR: %[[vmsub:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[nz]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+! CHECK-LLVMIR: llvm.store %[[vmsub]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
+
+! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[nz:.*]] = fneg contract <2 x double> %[[z]]
+! CHECK: %[[vmsub:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[nz]])
+! CHECK: store <2 x double> %[[vmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_msub_testf64
+
+! vec_nmadd
+
+! CHECK-LABEL: vec_nmadd_testf32
+subroutine vec_nmadd_testf32(x, y, z)
+ vector(real(4)) :: vnmsum, x, y, z
+ vnmsum = vec_nmadd(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[msum:.*]] = fir.call @llvm.fma.v4f32(%[[vx]], %[[vy]], %[[vz]]) fastmath<contract> : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> !fir.vector<4:f32>
+! CHECK-FIR: %[[vmsum:.*]] = fir.convert %[[msum]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[nmsum:.*]] = arith.negf %[[vmsum]] fastmath<contract> : vector<4xf32>
+! CHECK-FIR: %[[vnmsum:.*]] = fir.convert %[[nmsum]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[vnmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[msum:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+! CHECK-LLVMIR: %[[vnmsum:.*]] = llvm.fneg %[[msum]] {fastmathFlags = #llvm.fastmath<contract>} : vector<4xf32>
+! CHECK-LLVMIR: llvm.store %[[vnmsum]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
+
+! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[msum:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
+! CHECK: %[[vnmsum:.*]] = fneg contract <4 x float> %[[msum]]
+! CHECK: store <4 x float> %[[vnmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmadd_testf32
+
+! CHECK-LABEL: vec_nmadd_testf64
+subroutine vec_nmadd_testf64(x, y, z)
+ vector(real(8)) :: vnmsum, x, y, z
+ vnmsum = vec_nmadd(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[msum:.*]] = fir.call @llvm.fma.v2f64(%[[vx]], %[[vy]], %[[vz]]) fastmath<contract> : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> !fir.vector<2:f64>
+! CHECK-FIR: %[[vmsum:.*]] = fir.convert %[[msum]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[nmsum:.*]] = arith.negf %[[vmsum]] fastmath<contract> : vector<2xf64>
+! CHECK-FIR: %[[vnmsum:.*]] = fir.convert %[[nmsum]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! CHECK-FIR: fir.store %[[vnmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[msum:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+! CHECK-LLVMIR: %[[vnmsum:.*]] = llvm.fneg %[[msum]] {fastmathFlags = #llvm.fastmath<contract>} : vector<2xf64>
+! CHECK-LLVMIR: llvm.store %[[vnmsum]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
+
+! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[msum:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
+! CHECK: %[[vnmsum:.*]] = fneg contract <2 x double> %[[msum]]
+! CHECK: store <2 x double> %[[vnmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmadd_testf64
diff --git a/flang/test/Lower/PowerPC/ppc-vec_sel.f90 b/flang/test/Lower/PowerPC/ppc-vec_sel.f90
new file mode 100644
index 0000000..0802af5
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec_sel.f90
@@ -0,0 +1,492 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
+! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_sel
+!----------------------
+
+! CHECK-LABEL: vec_sel_testi1
+subroutine vec_sel_testi1(arg1, arg2, arg3)
+ vector(integer(1)) :: arg1, arg2, r
+ vector(unsigned(1)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<16xi8>) -> !fir.vector<16:i8>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[arg1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[arg2]], %[[arg3]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+
+! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]]
+! CHECK: %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]]
+end subroutine vec_sel_testi1
+
+! CHECK-LABEL: vec_sel_testi2
+subroutine vec_sel_testi2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1, arg2, r
+ vector(unsigned(2)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<8:ui16>) -> vector<8xi16>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<8xi16> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<8xi16>) -> !fir.vector<8:i16>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<8xi16> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16>
+end subroutine vec_sel_testi2
+
+! CHECK-LABEL: vec_sel_testi4
+subroutine vec_sel_testi4(arg1, arg2, arg3)
+ vector(integer(4)) :: arg1, arg2, r
+ vector(unsigned(4)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xi32> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xi32> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xi32>) -> !fir.vector<4:i32>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xi32> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32>
+end subroutine vec_sel_testi4
+
+! CHECK-LABEL: vec_sel_testi8
+subroutine vec_sel_testi8(arg1, arg2, arg3)
+ vector(integer(8)) :: arg1, arg2, r
+ vector(unsigned(8)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xi64> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xi64> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xi64>) -> !fir.vector<2:i64>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
+
+! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64>
+end subroutine vec_sel_testi8
+
+! CHECK-LABEL: vec_sel_testu1
+subroutine vec_sel_testu1(arg1, arg2, arg3)
+ vector(unsigned(1)) :: arg1, arg2, r
+ vector(unsigned(1)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<16xi8>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[arg1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[arg2]], %[[arg3]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.or %[[and1:.*]], %[[and2]] : vector<16xi8>
+
+! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]]
+! CHECK: %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]]
+end subroutine vec_sel_testu1
+
+! CHECK-LABEL: vec_sel_testu2
+subroutine vec_sel_testu2(arg1, arg2, arg3)
+ vector(unsigned(2)) :: arg1, arg2, r
+ vector(unsigned(2)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<8:ui16>) -> vector<8xi16>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<8xi16> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<8xi16>) -> !fir.vector<8:ui16>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<8xi16> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1:.*]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16>
+end subroutine vec_sel_testu2
+
+! CHECK-LABEL: vec_sel_testu4
+subroutine vec_sel_testu4(arg1, arg2, arg3)
+ vector(unsigned(4)) :: arg1, arg2, r
+ vector(unsigned(4)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xi32> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xi32> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xi32> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32>
+end subroutine vec_sel_testu4
+
+! CHECK-LABEL: vec_sel_testu8
+subroutine vec_sel_testu8(arg1, arg2, arg3)
+ vector(unsigned(8)) :: arg1, arg2, r
+ vector(unsigned(8)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xi64> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xi64> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xi64>) -> !fir.vector<2:ui64>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
+
+! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64>
+end subroutine vec_sel_testu8
+
+! CHECK-LABEL: vec_sel_testr4
+subroutine vec_sel_testr4(arg1, arg2, arg3)
+ vector(real(4)) :: arg1, arg2, r
+ vector(unsigned(4)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xf32> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xf32>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xf32>) -> !fir.vector<4:f32>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xf32>
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <4 x float> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <4 x float> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x float>
+end subroutine vec_sel_testr4
+
+! CHECK-LABEL: vec_sel_testr8
+subroutine vec_sel_testr8(arg1, arg2, arg3)
+ vector(real(8)) :: arg1, arg2, r
+ vector(unsigned(8)) :: arg3
+ r = vec_sel(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64>
+! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
+! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
+! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xf64> to vector<16xi8>
+! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xf64> to vector<16xi8>
+! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8>
+! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
+! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
+! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
+! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xf64>
+! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xf64>) -> !fir.vector<2:f64>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
+! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xf64> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xf64> to vector<16xi8>
+! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8>
+! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8>
+! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8>
+! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8>
+! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xf64>
+
+! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! CHECK: %[[bc1:.*]] = bitcast <2 x double> %5 to <16 x i8>
+! CHECK: %[[bc2:.*]] = bitcast <2 x double> %6 to <16 x i8>
+! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
+! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x double>
+end subroutine vec_sel_testr8