From ce48f43f054f396fec50287cf8c7624bfaa5842a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 19 Apr 2024 12:38:53 -0700
Subject: [SelectionDAG] Require UADDO_CARRY carryin and carryout to have the
 same type. (#89255)

This requires type legalization to keep them the same. This means we no
longer need to legalize the operand since it will be legalized when we
legalize the second result.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp        |  4 ++--
 .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp    | 20 +-------------------
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h        |  1 -
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp       |  2 +-
 4 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a21b9ac..3cdb801 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3359,7 +3359,7 @@ SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
 }
 
 /**
- * If we are facing some sort of diamond carry propapagtion pattern try to
+ * If we are facing some sort of diamond carry propagation pattern try to
  * break it up to generate something like:
  *   (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
  *
@@ -3400,7 +3400,7 @@ static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
     Z = Carry0.getOperand(2);
   } else if (Carry0.getOpcode() == ISD::UADDO &&
              isOneConstant(Carry0.getOperand(1))) {
-    EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+    EVT VT = Carry0->getValueType(1);
     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
   } else {
     // We couldn't find a suitable Z.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 93ce9c2..55f9737 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -949,7 +949,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
   unsigned NumOps = N->getNumOperands();
   assert(NumOps <= 3 && "Too many operands");
   if (NumOps == 3)
-    Ops[2] = N->getOperand(2);
+    Ops[2] = PromoteTargetBoolean(N->getOperand(2), VT);
 
   SDLoc dl(N);
   SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
@@ -1867,11 +1867,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::FSHL:
   case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break;
 
-  case ISD::SADDO_CARRY:
-  case ISD::SSUBO_CARRY:
-  case ISD::UADDO_CARRY:
-  case ISD::USUBO_CARRY: Res = PromoteIntOp_ADDSUBO_CARRY(N, OpNo); break;
-
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
 
@@ -2373,19 +2368,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
                      N->getOperand(1), N->getOperand(2));
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo) {
-  assert(OpNo == 2 && "Don't know how to promote this operand!");
-
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  SDValue Carry = N->getOperand(2);
-  SDLoc DL(N);
-
-  Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
-
-  return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
-}
-
 SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
   SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
   return SDValue(
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 919c0d4..0483f7c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -389,7 +389,6 @@ private:
   SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
-  SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
   SDValue PromoteIntOp_FIX(SDNode *N);
   SDValue PromoteIntOp_ExpOp(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f3441c9..7dbf83b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -9924,7 +9924,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
     assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() &&
            Ops[0].getValueType() == Ops[1].getValueType() &&
            Ops[0].getValueType() == VTList.VTs[0] &&
-           Ops[2].getValueType().isInteger() &&
+           Ops[2].getValueType() == VTList.VTs[1] &&
            "Binary operator types must match!");
     break;
   case ISD::SMUL_LOHI:
-- 
cgit v1.1


From 016ce9ed5cd3694cbff72a768a593714913822ea Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 19 Apr 2024 12:39:32 -0700
Subject: [RISCV] Rename FeatureRVE to FeatureStdExtE. NFC (#89174)

Planning to declare all extensions in tablegen so we can generate the
tables for RISCVISAInfo.cpp. This requires making "e" consistent with
other extensions.
---
 llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp       |  2 +-
 llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp |  2 +-
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp     |  2 +-
 llvm/lib/Target/RISCV/RISCVFeatures.td                   |  8 ++++----
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp              |  4 ++--
 llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp              | 12 ++++++------
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 455a902..d926ccd 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -83,7 +83,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
 
   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
   bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
-  bool isRVE() const { return getSTI().hasFeature(RISCV::FeatureRVE); }
+  bool isRVE() const { return getSTI().hasFeature(RISCV::FeatureStdExtE); }
 
   RISCVTargetStreamer &getTargetStreamer() {
     assert(getParser().getStreamer().getTargetStreamer() &&
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 6aadabd..998b918 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -64,7 +64,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() {
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,
                                            uint64_t Address,
                                            const MCDisassembler *Decoder) {
-  bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureRVE);
+  bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);
 
   if (RegNo >= 32 || (IsRVE && RegNo >= 16))
     return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 5d9a58b..67c9060 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -40,7 +40,7 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
                      StringRef ABIName) {
   auto TargetABI = getTargetABI(ABIName);
   bool IsRV64 = TT.isArch64Bit();
-  bool IsRVE = FeatureBits[RISCV::FeatureRVE];
+  bool IsRVE = FeatureBits[RISCV::FeatureStdExtE];
 
   if (!ABIName.empty() && TargetABI == ABI_Unknown) {
     errs()
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 339c039..116e5a2 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -16,6 +16,10 @@ def FeatureStdExtI
     : SubtargetFeature<"i", "HasStdExtI", "true",
                        "'I' (Base Integer Instruction Set)">;
 
+def FeatureStdExtE
+    : SubtargetFeature<"e", "HasStdExtE", "true",
+                       "Implements RV{32,64}E (provides 16 rather than 32 GPRs)">;
+
 def FeatureStdExtZic64b
     : SubtargetFeature<"zic64b", "HasStdExtZic64b", "true",
                        "'Zic64b' (Cache Block Size Is 64 Bytes)">;
@@ -1162,10 +1166,6 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
 defvar RV32 = DefaultMode;
 def RV64           : HwMode<"+64bit", [IsRV64]>;
 
-def FeatureRVE
-    : SubtargetFeature<"e", "IsRVE", "true",
-                       "Implements RV{32,64}E (provides 16 rather than 32 GPRs)">;
-
 def FeatureRelax
     : SubtargetFeature<"relax", "EnableLinkerRelax", "true",
                        "Enable Linker relaxation.">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b0deb1d..82339dd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18951,7 +18951,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
   case CallingConv::RISCV_VectorCall:
     break;
   case CallingConv::GHC:
-    if (Subtarget.isRVE())
+    if (Subtarget.hasStdExtE())
       report_fatal_error("GHC calling convention is not supported on RVE!");
     if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
       report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
@@ -19189,7 +19189,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
 
   if (CallConv == CallingConv::GHC) {
-    if (Subtarget.isRVE())
+    if (Subtarget.hasStdExtE())
       report_fatal_error("GHC calling convention is not supported on RVE!");
     ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
   } else
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 367a62e..6a48848 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -65,10 +65,10 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     if (Subtarget.hasStdExtD())
       return CSR_XLEN_F64_Interrupt_SaveList;
     if (Subtarget.hasStdExtF())
-      return Subtarget.isRVE() ? CSR_XLEN_F32_Interrupt_RVE_SaveList
-                               : CSR_XLEN_F32_Interrupt_SaveList;
-    return Subtarget.isRVE() ? CSR_Interrupt_RVE_SaveList
-                             : CSR_Interrupt_SaveList;
+      return Subtarget.hasStdExtE() ? CSR_XLEN_F32_Interrupt_RVE_SaveList
+                                    : CSR_XLEN_F32_Interrupt_SaveList;
+    return Subtarget.hasStdExtE() ? CSR_Interrupt_RVE_SaveList
+                                  : CSR_Interrupt_SaveList;
   }
 
   bool HasVectorCSR =
@@ -126,7 +126,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   markSuperRegs(Reserved, RISCV::DUMMY_REG_PAIR_WITH_X0);
 
   // There are only 16 GPRs for RVE.
-  if (Subtarget.isRVE())
+  if (Subtarget.hasStdExtE())
     for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++)
       markSuperRegs(Reserved, Reg);
 
@@ -145,7 +145,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   markSuperRegs(Reserved, RISCV::VCIX_STATE);
 
   if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
-    if (Subtarget.isRVE())
+    if (Subtarget.hasStdExtE())
       report_fatal_error("Graal reserved registers do not exist in RVE");
     markSuperRegs(Reserved, RISCV::X23);
     markSuperRegs(Reserved, RISCV::X27);
-- 
cgit v1.1


From 8e2060bf210e83d6cc34f61185918ca67b54f6f1 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 19 Apr 2024 12:39:54 -0700
Subject: [X86][TableGen] Remove unnecessary use of formatted_raw_ostream. NFC
 (#89343)

This code used to use the PadToColumn feature of formatted_raw_ostream,
but no longer does. formatted_raw_ostream is slower than regular
raw_ostream because it has to keep track of the number of character
since the last new line character.
---
 llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 0e36747..1440863 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -15,7 +15,6 @@
 #include "Common/CodeGenTarget.h"
 #include "X86RecognizableInstr.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/X86FoldTablesUtils.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
@@ -95,7 +94,7 @@ class X86FoldTablesEmitter {
                       const CodeGenInstruction *MemInst)
         : RegInst(RegInst), MemInst(MemInst) {}
 
-    void print(formatted_raw_ostream &OS) const {
+    void print(raw_ostream &OS) const {
       OS.indent(2);
       OS << "{X86::" << RegInst->TheDef->getName() << ", ";
       OS << "X86::" << MemInst->TheDef->getName() << ", ";
@@ -222,7 +221,7 @@ private:
   // Print the given table as a static const C++ array of type
   // X86FoldTableEntry.
   void printTable(const FoldTable &Table, StringRef TableName,
-                  formatted_raw_ostream &OS) {
+                  raw_ostream &OS) {
     OS << "static const X86FoldTableEntry " << TableName << "[] = {\n";
 
     for (auto &E : Table)
@@ -619,9 +618,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
   }
 }
 
-void X86FoldTablesEmitter::run(raw_ostream &O) {
-  formatted_raw_ostream OS(O);
-
+void X86FoldTablesEmitter::run(raw_ostream &OS) {
   // Holds all memory instructions
   std::vector<const CodeGenInstruction *> MemInsts;
   // Holds all register instructions - divided according to opcode.
-- 
cgit v1.1


From 3ea5dff0efdbb4bf5590d882810aa42f9ec26e4e Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Fri, 19 Apr 2024 12:45:40 -0700
Subject: [clang][modules] Only avoid pruning module maps when asked to
 (#89428)

Pruning non-affecting module maps is useful even when passing module
maps explicitly via `-fmodule-map-file=<path>`. For this situation, this
patch reinstates the behavior we had prior to #87849. For the situation
where the explicit module map file arguments were generated by the
dependency scanner (which already pruned the non-affecting ones), this
patch introduces new `-cc1` flag
`-fno-modules-prune-non-affecting-module-map-files` that avoids the
extra work.
---
 clang/include/clang/Driver/Options.td              |  5 ++
 clang/include/clang/Lex/HeaderSearchOptions.h      |  7 ++-
 clang/lib/Serialization/ASTWriter.cpp              |  6 +--
 .../DependencyScanning/ModuleDepCollector.cpp      |  5 ++
 clang/test/ClangScanDeps/modules-full.cpp          |  3 ++
 .../Modules/add-remove-irrelevant-module-map.m     | 32 -----------
 .../Modules/prune-non-affecting-module-map-files.m | 62 ++++++++++++++++++++++
 7 files changed, 84 insertions(+), 36 deletions(-)
 delete mode 100644 clang/test/Modules/add-remove-irrelevant-module-map.m
 create mode 100644 clang/test/Modules/prune-non-affecting-module-map-files.m

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 46225e2..52d1617 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3100,6 +3100,11 @@ defm modules_skip_header_search_paths : BoolFOption<"modules-skip-header-search-
     HeaderSearchOpts<"ModulesSkipHeaderSearchPaths">, DefaultFalse,
     PosFlag<SetTrue, [], [], "Disable writing header search paths">,
     NegFlag<SetFalse>, BothFlags<[], [CC1Option]>>;
+def fno_modules_prune_non_affecting_module_map_files :
+    Flag<["-"], "fno-modules-prune-non-affecting-module-map-files">,
+    Group<f_Group>, Flags<[]>, Visibility<[CC1Option]>,
+    MarshallingInfoNegativeFlag<HeaderSearchOpts<"ModulesPruneNonAffectingModuleMaps">>,
+    HelpText<"Do not prune non-affecting module map files when writing module files">;
 
 def fincremental_extensions :
   Flag<["-"], "fincremental-extensions">,
diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index 637dc77..e4437ac 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -252,6 +252,10 @@ public:
   LLVM_PREFERRED_TYPE(bool)
   unsigned ModulesSkipPragmaDiagnosticMappings : 1;
 
+  /// Whether to prune non-affecting module map files from PCM files.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned ModulesPruneNonAffectingModuleMaps : 1;
+
   LLVM_PREFERRED_TYPE(bool)
   unsigned ModulesHashContent : 1;
 
@@ -280,7 +284,8 @@ public:
         ModulesValidateDiagnosticOptions(true),
         ModulesSkipDiagnosticOptions(false),
         ModulesSkipHeaderSearchPaths(false),
-        ModulesSkipPragmaDiagnosticMappings(false), ModulesHashContent(false),
+        ModulesSkipPragmaDiagnosticMappings(false),
+        ModulesPruneNonAffectingModuleMaps(true), ModulesHashContent(false),
         ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false) {}
 
   /// AddPath - Add the \p Path path to the specified \p Group list.
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 6dd87b5..8a4b362 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -166,9 +166,9 @@ namespace {
 
 std::optional<std::set<const FileEntry *>>
 GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
-  // Without implicit module map search, there's no good reason to know about
-  // any module maps that are not affecting.
-  if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ImplicitModuleMaps)
+  if (!PP.getHeaderSearchInfo()
+           .getHeaderSearchOpts()
+           .ModulesPruneNonAffectingModuleMaps)
     return std::nullopt;
 
   SmallVector<const Module *> ModulesToProcess{RootModule};
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index e19f19b2..f46324e 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -179,6 +179,11 @@ makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
   CI.resetNonModularOptions();
   CI.clearImplicitModuleBuildOptions();
 
+  // The scanner takes care to avoid passing non-affecting module maps to the
+  // explicit compiles. No need to do extra work just to find out there are no
+  // module map files to prune.
+  CI.getHeaderSearchOpts().ModulesPruneNonAffectingModuleMaps = false;
+
   // Remove options incompatible with explicit module build or are likely to
   // differ between identical modules discovered from different translation
   // units.
diff --git a/clang/test/ClangScanDeps/modules-full.cpp b/clang/test/ClangScanDeps/modules-full.cpp
index 59efef0..a00a431 100644
--- a/clang/test/ClangScanDeps/modules-full.cpp
+++ b/clang/test/ClangScanDeps/modules-full.cpp
@@ -33,6 +33,7 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-cc1"
 // CHECK:              "-emit-module"
+// CHECK:              "-fno-modules-prune-non-affecting-module-map-files"
 // CHECK:              "-fmodule-file={{.*}}[[PREFIX]]/module-cache{{(_clangcl)?}}/[[HASH_H2_DINCLUDE]]/header2-{{[A-Z0-9]+}}.pcm"
 // CHECK-NOT:          "-fimplicit-module-maps"
 // CHECK:              "-fmodule-name=header1"
@@ -51,6 +52,7 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-cc1",
 // CHECK:              "-emit-module",
+// CHECK:              "-fno-modules-prune-non-affecting-module-map-files"
 // CHECK-NOT:          "-fimplicit-module-maps",
 // CHECK:              "-fmodule-name=header1",
 // CHECK:              "-fno-implicit-modules",
@@ -68,6 +70,7 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-cc1",
 // CHECK:              "-emit-module",
+// CHECK:              "-fno-modules-prune-non-affecting-module-map-files"
 // CHECK:              "-fmodule-name=header2",
 // CHECK-NOT:          "-fimplicit-module-maps",
 // CHECK:              "-fno-implicit-modules",
diff --git a/clang/test/Modules/add-remove-irrelevant-module-map.m b/clang/test/Modules/add-remove-irrelevant-module-map.m
deleted file mode 100644
index 7e3e580..0000000
--- a/clang/test/Modules/add-remove-irrelevant-module-map.m
+++ /dev/null
@@ -1,32 +0,0 @@
-// RUN: rm -rf %t && mkdir %t
-// RUN: split-file %s %t
-
-//--- a/module.modulemap
-module a {}
-
-//--- b/module.modulemap
-module b {}
-
-//--- c/module.modulemap
-module c {}
-
-//--- module.modulemap
-module m { header "m.h" }
-//--- m.h
-@import c;
-
-//--- test-simple.m
-// expected-no-diagnostics
-@import m;
-
-// Build modules with the non-affecting "a/module.modulemap".
-// RUN: %clang_cc1 -I %t/a -I %t/b -I %t/c -I %t -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash %t/test-simple.m -verify
-// RUN: mv %t/cache %t/cache-with
-
-// Build modules without the non-affecting "a/module.modulemap".
-// RUN: rm -rf %t/a/module.modulemap
-// RUN: %clang_cc1 -I %t/a -I %t/b -I %t/c -I %t -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash %t/test-simple.m -verify
-// RUN: mv %t/cache %t/cache-without
-
-// Check that the PCM files are bit-for-bit identical.
-// RUN: diff %t/cache-with/m.pcm %t/cache-without/m.pcm
diff --git a/clang/test/Modules/prune-non-affecting-module-map-files.m b/clang/test/Modules/prune-non-affecting-module-map-files.m
new file mode 100644
index 0000000..ba2b3a3
--- /dev/null
+++ b/clang/test/Modules/prune-non-affecting-module-map-files.m
@@ -0,0 +1,62 @@
+// Check that the presence of non-affecting module map files does not affect the
+// contents of PCM files.
+
+// RUN: rm -rf %t && mkdir %t
+// RUN: split-file %s %t
+
+//--- a/module.modulemap
+module a {}
+
+//--- b/module.modulemap
+module b {}
+
+//--- c/module.modulemap
+module c { header "c.h" }
+//--- c/c.h
+@import b;
+
+//--- tu.m
+@import c;
+
+//--- explicit-mms-common-args.rsp
+-fmodule-map-file=b/module.modulemap -fmodule-map-file=c/module.modulemap -fmodules -fmodules-cache-path=cache -fdisable-module-hash -fsyntax-only tu.m
+//--- implicit-search-args.rsp
+-I a -I b -I c -fimplicit-module-maps -fmodules -fmodules-cache-path=cache -fdisable-module-hash -fsyntax-only tu.m
+//--- implicit-search-args.rsp-end
+
+// Test with explicit module map files.
+//
+// RUN: %clang_cc1 -working-directory %t @%t/explicit-mms-common-args.rsp
+// RUN: mv %t/cache %t/cache-explicit-no-a-prune
+// RUN: %clang_cc1 -working-directory %t @%t/explicit-mms-common-args.rsp -fno-modules-prune-non-affecting-module-map-files
+// RUN: mv %t/cache %t/cache-explicit-no-a-keep
+//
+// RUN: %clang_cc1 -working-directory %t -fmodule-map-file=a/module.modulemap @%t/explicit-mms-common-args.rsp
+// RUN: mv %t/cache %t/cache-explicit-a-prune
+// RUN: %clang_cc1 -working-directory %t -fmodule-map-file=a/module.modulemap @%t/explicit-mms-common-args.rsp -fno-modules-prune-non-affecting-module-map-files
+// RUN: mv %t/cache %t/cache-explicit-a-keep
+//
+// RUN: diff %t/cache-explicit-no-a-prune/c.pcm %t/cache-explicit-a-prune/c.pcm
+// RUN: not diff %t/cache-explicit-no-a-keep/c.pcm %t/cache-explicit-a-keep/c.pcm
+
+// Test with implicit module map search.
+//
+// RUN: %clang_cc1 -working-directory %t @%t/implicit-search-args.rsp
+// RUN: mv %t/cache %t/cache-implicit-no-a-prune
+// RUN: %clang_cc1 -working-directory %t @%t/implicit-search-args.rsp -fno-modules-prune-non-affecting-module-map-files
+// RUN: mv %t/cache %t/cache-implicit-no-a-keep
+//
+// FIXME: Instead of removing "a/module.modulemap" from the file system, we
+//        could drop the "-I a" search path argument in combination with the
+//        "-fmodules-skip-header-search-paths" flag. Unfortunately, that flag
+//        does not prevent serialization of the search path usage bit vector,
+//        making the files differ anyways.
+// RUN: rm %t/a/module.modulemap
+//
+// RUN: %clang_cc1 -working-directory %t @%t/implicit-search-args.rsp
+// RUN: mv %t/cache %t/cache-implicit-a-prune
+// RUN: %clang_cc1 -working-directory %t @%t/implicit-search-args.rsp -fno-modules-prune-non-affecting-module-map-files
+// RUN: mv %t/cache %t/cache-implicit-a-keep
+//
+// RUN: diff %t/cache-implicit-no-a-prune/c.pcm %t/cache-implicit-a-prune/c.pcm
+// RUN: not diff %t/cache-implicit-no-a-keep/c.pcm %t/cache-implicit-a-keep/c.pcm
-- 
cgit v1.1


From aa7c104124ac4a8da12fbd25c797bc8ab438c545 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Fri, 19 Apr 2024 12:47:40 -0700
Subject: [flang][cuda] Allow fixed size array with the managed attribute
 (#89436)

Fixed size array and scalar should be allowed with the `managed`
attribute.
---
 flang/lib/Semantics/check-declarations.cpp |  4 ++--
 flang/test/Semantics/cuf03.cuf             | 11 ++++-------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index 875929e..adbd21d 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -956,9 +956,9 @@ void CheckHelper::CheckObjectEntity(
       break;
     case common::CUDADataAttr::Managed:
       if (!IsAutomatic(symbol) && !IsAllocatable(symbol) &&
-          !details.isDummy()) {
+          !details.isDummy() && !evaluate::IsExplicitShape(symbol)) {
         messages_.Say(
-            "Object '%s' with ATTRIBUTES(MANAGED) must also be allocatable, automatic, or a dummy argument"_err_en_US,
+            "Object '%s' with ATTRIBUTES(MANAGED) must also be allocatable, automatic, explicit shape, or a dummy argument"_err_en_US,
             symbol.name());
       }
       break;
diff --git a/flang/test/Semantics/cuf03.cuf b/flang/test/Semantics/cuf03.cuf
index 8decb8d..020a172 100644
--- a/flang/test/Semantics/cuf03.cuf
+++ b/flang/test/Semantics/cuf03.cuf
@@ -32,14 +32,11 @@ module m
   real, shared, target :: mst
   !ERROR: Object 'msa' with ATTRIBUTES(SHARED) must be declared in a device subprogram
   real, shared :: msa(*)
-  !ERROR: Object 'mm' with ATTRIBUTES(MANAGED) must also be allocatable, automatic, or a dummy argument
-  real, managed :: mm
-  !ERROR: Object 'mmi' with ATTRIBUTES(MANAGED) must also be allocatable, automatic, or a dummy argument
-  real, managed :: mmi = 1.
+  real, managed :: mm ! ok
+  real, managed :: mmi = 1. ! ok
   real, managed, allocatable :: mml ! ok
-  !ERROR: Object 'mmp' with ATTRIBUTES(MANAGED) must also be allocatable, automatic, or a dummy argument
-  real, managed, pointer :: mmp ! ok
-  !ERROR: Object 'mmt' with ATTRIBUTES(MANAGED) must also be allocatable, automatic, or a dummy argument
+  !ERROR: Object 'mmp' with ATTRIBUTES(MANAGED) must also be allocatable, automatic, explicit shape, or a dummy argument
+  real, managed, pointer :: mmp(:)
   real, managed, target :: mmt
   !WARNING: Object 'mp' with ATTRIBUTES(PINNED) should also be allocatable
   real, pinned :: mp
-- 
cgit v1.1


From c32712d1763d74329b42c1cd68a24d4c0075b596 Mon Sep 17 00:00:00 2001
From: Bill Wendling <5993918+bwendling@users.noreply.github.com>
Date: Fri, 19 Apr 2024 12:48:33 -0700
Subject: [Clang] Handle structs with inner structs and no fields (#89126)

A struct that declares an inner struct, but no fields, won't have a
field count. So getting the offset of the inner struct fails. This
happens in both C and C++:

  struct foo {
    struct bar {
      int Quantizermatrix[];
    };
  };

Here 'struct foo' has no fields.

Closes: https://github.com/llvm/llvm-project/issues/88931
---
 clang/lib/CodeGen/CGBuiltin.cpp                | 25 +++++++++-------
 clang/test/CodeGen/attr-counted-by-pr88931.c   | 40 ++++++++++++++++++++++++++
 clang/test/CodeGen/attr-counted-by-pr88931.cpp | 21 ++++++++++++++
 3 files changed, 75 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/CodeGen/attr-counted-by-pr88931.c
 create mode 100644 clang/test/CodeGen/attr-counted-by-pr88931.cpp

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a05874e..4319501 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -826,29 +826,32 @@ const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberField(
     ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset) {
   const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
       getLangOpts().getStrictFlexArraysLevel();
-  unsigned FieldNo = 0;
-  bool IsUnion = RD->isUnion();
+  uint32_t FieldNo = 0;
 
-  for (const Decl *D : RD->decls()) {
-    if (const auto *Field = dyn_cast<FieldDecl>(D);
-        Field && (Name.empty() || Field->getNameAsString() == Name) &&
+  if (RD->isImplicit())
+    return nullptr;
+
+  for (const FieldDecl *FD : RD->fields()) {
+    if ((Name.empty() || FD->getNameAsString() == Name) &&
         Decl::isFlexibleArrayMemberLike(
-            Ctx, Field, Field->getType(), StrictFlexArraysLevel,
+            Ctx, FD, FD->getType(), StrictFlexArraysLevel,
             /*IgnoreTemplateOrMacroSubstitution=*/true)) {
       const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
       Offset += Layout.getFieldOffset(FieldNo);
-      return Field;
+      return FD;
     }
 
-    if (const auto *Record = dyn_cast<RecordDecl>(D))
-      if (const FieldDecl *Field =
-              FindFlexibleArrayMemberField(Ctx, Record, Name, Offset)) {
+    QualType Ty = FD->getType();
+    if (Ty->isRecordType()) {
+      if (const FieldDecl *Field = FindFlexibleArrayMemberField(
+              Ctx, Ty->getAsRecordDecl(), Name, Offset)) {
         const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
         Offset += Layout.getFieldOffset(FieldNo);
         return Field;
       }
+    }
 
-    if (!IsUnion && isa<FieldDecl>(D))
+    if (!RD->isUnion())
       ++FieldNo;
   }
 
diff --git a/clang/test/CodeGen/attr-counted-by-pr88931.c b/clang/test/CodeGen/attr-counted-by-pr88931.c
new file mode 100644
index 0000000..cc3d751
--- /dev/null
+++ b/clang/test/CodeGen/attr-counted-by-pr88931.c
@@ -0,0 +1,40 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wno-missing-declarations -emit-llvm -o - %s | FileCheck %s
+
+struct foo {
+  int x,y,z;
+  struct bar {
+    int count;
+    int array[] __attribute__((counted_by(count)));
+  };
+};
+
+void init(void * __attribute__((pass_dynamic_object_size(0))));
+
+// CHECK-LABEL: define dso_local void @test1(
+// CHECK-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4
+// CHECK-NEXT:    tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef -1) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+void test1(struct bar *p) {
+  init(p->array);
+}
+
+struct mux {
+  int count;
+  int array[] __attribute__((counted_by(count)));
+};
+
+struct bux { struct mux x; };
+
+// CHECK-LABEL: define dso_local void @test2(
+// CHECK-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @init(ptr noundef [[P]], i64 noundef -1) #[[ATTR2]]
+// CHECK-NEXT:    ret void
+//
+void test2(struct bux *p) {
+  init(p);
+}
diff --git a/clang/test/CodeGen/attr-counted-by-pr88931.cpp b/clang/test/CodeGen/attr-counted-by-pr88931.cpp
new file mode 100644
index 0000000..2a8cc1d
--- /dev/null
+++ b/clang/test/CodeGen/attr-counted-by-pr88931.cpp
@@ -0,0 +1,21 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wall -emit-llvm -o - %s | FileCheck %s
+
+struct foo {
+  struct bar {
+    int array[];
+    bar();
+  };
+};
+
+void init(void * __attribute__((pass_dynamic_object_size(0))));
+
+// CHECK-LABEL: define dso_local void @_ZN3foo3barC1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+foo::bar::bar() {
+  init(array);
+}
-- 
cgit v1.1


From 0c455ee34823cb991a35e33ff020bb7cc4e44c8a Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin@yahoo.com>
Date: Fri, 19 Apr 2024 15:50:55 -0400
Subject: [flang][OpenMP] Use maxnum/minnum for lowering of max/min reduction
 operators (#89258)

This patch changes the lowering of max and min to be lowered to
arith::MaxNumFop and arith::MinNumFOp instead of using arith::MaximumFOp
and arith::MinimumFOp. The arith::MaximumFOp and arith::MinimumFOp map
to the corresponding intrinsics llvm.maximum.* and llvm.minimum.*
intrinsics which conform to the semantics specified in the draft of IEEE
754-2019, which is not supported by all hardware. Instead using
arith::MaximumFOp and arith::MinimumFOp will allow code generation for
more targets and match the code generated by clang OpenMP.

fixes #87955
---
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp              | 4 ++--
 flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90 | 2 +-
 flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90       | 2 +-
 flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90 | 2 +-
 flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90       | 2 +-
 flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90     | 2 +-
 flang/test/Lower/OpenMP/wsloop-reduction-max.f90           | 2 +-
 flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90     | 2 +-
 flang/test/Lower/OpenMP/wsloop-reduction-min.f90           | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 6a91ee4..9f8352a 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -220,12 +220,12 @@ mlir::Value ReductionProcessor::createScalarCombiner(
   switch (redId) {
   case ReductionIdentifier::MAX:
     reductionOp =
-        getReductionOperation<mlir::arith::MaximumFOp, mlir::arith::MaxSIOp>(
+        getReductionOperation<mlir::arith::MaxNumFOp, mlir::arith::MaxSIOp>(
             builder, type, loc, op1, op2);
     break;
   case ReductionIdentifier::MIN:
     reductionOp =
-        getReductionOperation<mlir::arith::MinimumFOp, mlir::arith::MinSIOp>(
+        getReductionOperation<mlir::arith::MinNumFOp, mlir::arith::MinSIOp>(
             builder, type, loc, op1, op2);
     break;
   case ReductionIdentifier::IOR:
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
index f0979ab..80b720e 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
@@ -11,7 +11,7 @@
 !CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
 !CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
 !CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
-!CHECK:   %[[RES:.*]] = arith.maximumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   %[[RES:.*]] = arith.maxnumf %[[LD0]], %[[LD1]] {{.*}}: f32
 !CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
 !CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
index 996296c..c3b821e 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
@@ -6,7 +6,7 @@
 !CHECK:   omp.yield(%[[MINIMUM_VAL_F]] : f32)
 !CHECK: combiner
 !CHECK: ^bb0(%[[ARG0_F:.*]]: f32, %[[ARG1_F:.*]]: f32):
-!CHECK:   %[[COMB_VAL_F:.*]] = arith.maximumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
+!CHECK:   %[[COMB_VAL_F:.*]] = arith.maxnumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
 !CHECK:   omp.yield(%[[COMB_VAL_F]] : f32)
 
 !CHECK: omp.declare_reduction @[[MAX_DECLARE_I:.*]] : i32 init {
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
index 24aa8e4..b284f8e5 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
@@ -11,7 +11,7 @@
 !CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
 !CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
 !CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
-!CHECK:   %[[RES:.*]] = arith.minimumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   %[[RES:.*]] = arith.minnumf %[[LD0]], %[[LD1]] {{.*}}: f32
 !CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
 !CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
index 268f51c..ab33e18 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
@@ -6,7 +6,7 @@
 !CHECK:   omp.yield(%[[MAXIMUM_VAL_F]] : f32)
 !CHECK: combiner
 !CHECK: ^bb0(%[[ARG0_F:.*]]: f32, %[[ARG1_F:.*]]: f32):
-!CHECK:   %[[COMB_VAL_F:.*]] = arith.minimumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
+!CHECK:   %[[COMB_VAL_F:.*]] = arith.minnumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
 !CHECK:   omp.yield(%[[COMB_VAL_F]] : f32)
 
 !CHECK: omp.declare_reduction @[[MIN_DECLARE_I:.*]] : i32 init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
index ee562bb..2f6921e 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
@@ -13,7 +13,7 @@
 !CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
 !CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
 !CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
-!CHECK:   %[[RES:.*]] = arith.maximumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   %[[RES:.*]] = arith.maxnumf %[[LD0]], %[[LD1]] {{.*}}: f32
 !CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
 !CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
index 6f11f0e..c9cf5cb 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
@@ -10,7 +10,7 @@
 
 ! CHECK-LABEL:   } combiner {
 ! CHECK:         ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32):
-! CHECK:           %[[VAL_2:.*]] = arith.maximumf %[[VAL_0]], %[[VAL_1]] fastmath<contract> : f32
+! CHECK:           %[[VAL_2:.*]] = arith.maxnumf %[[VAL_0]], %[[VAL_1]] fastmath<contract> : f32
 ! CHECK:           omp.yield(%[[VAL_2]] : f32)
 ! CHECK:         }
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
index c037211..84a376b 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
@@ -13,7 +13,7 @@
 !CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
 !CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
 !CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
-!CHECK:   %[[RES:.*]] = arith.minimumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   %[[RES:.*]] = arith.minnumf %[[LD0]], %[[LD1]] {{.*}}: f32
 !CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
 !CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
index 2c694f8..3ba279a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
@@ -10,7 +10,7 @@
 
 ! CHECK-LABEL:   } combiner {
 ! CHECK:         ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32):
-! CHECK:           %[[VAL_2:.*]] = arith.minimumf %[[VAL_0]], %[[VAL_1]] fastmath<contract> : f32
+! CHECK:           %[[VAL_2:.*]] = arith.minnumf %[[VAL_0]], %[[VAL_1]] fastmath<contract> : f32
 ! CHECK:           omp.yield(%[[VAL_2]] : f32)
 ! CHECK:         }
 
-- 
cgit v1.1


From cee7d994b94db625177cdfebcb8a6ce1ed677f85 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Fri, 19 Apr 2024 12:33:07 -0700
Subject: [SLP]Fix PR89438: Check for same vectorized node in MinBWs, not user.

Need to check if the buildvector node has perfect diamond match in the
graph and the matched node is resized.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp    | 27 ++++++-
 .../SLPVectorizer/X86/gather-node-same-reduced.ll  | 84 ++++++++++++++++++++++
 2 files changed, 108 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2e1788b..1b56bb7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13139,9 +13139,30 @@ Value *BoUpSLP::vectorizeTree(
       assert(Vec->getType()->isIntOrIntVectorTy() &&
              PrevVec->getType()->isIntOrIntVectorTy() &&
              "Expected integer vector types only.");
-      assert(MinBWs.contains(TE->UserTreeIndices.front().UserTE) &&
-             "Expected user in MinBWs.");
-      bool IsSigned = MinBWs.lookup(TE->UserTreeIndices.front().UserTE).second;
+      std::optional<std::pair<unsigned long, bool>> Res;
+      if (const TreeEntry *BaseTE = getTreeEntry(TE->Scalars.front())) {
+        SmallVector<const TreeEntry *> BaseTEs;
+        if (BaseTE->isSame(TE->Scalars))
+          BaseTEs.push_back(BaseTE);
+        auto It = MultiNodeScalars.find(TE->Scalars.front());
+        if (It != MultiNodeScalars.end()) {
+          for (const TreeEntry *MNTE : It->getSecond())
+            if (MNTE->isSame(TE->Scalars))
+              BaseTEs.push_back(MNTE);
+        }
+        const auto *BaseIt = find_if(BaseTEs, [&](const TreeEntry *BaseTE) {
+          return MinBWs.contains(BaseTE);
+        });
+        if (BaseIt != BaseTEs.end())
+          Res = MinBWs.lookup(*BaseIt);
+      }
+      if (!Res) {
+        assert(MinBWs.contains(TE->UserTreeIndices.front().UserTE) &&
+               "Expected user in MinBWs.");
+        Res = MinBWs.lookup(TE->UserTreeIndices.front().UserTE);
+      }
+      assert(Res && "Expected user node or perfect diamond match in MinBWs.");
+      bool IsSigned = Res->second;
       Vec = Builder.CreateIntCast(Vec, PrevVec->getType(), IsSigned);
     }
     PrevVec->replaceAllUsesWith(Vec);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll
new file mode 100644
index 0000000..b03eb9e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s
+
+define i64 @test(ptr %p) {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 12
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i32> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8>
+; CHECK-NEXT:    store <4 x i8> [[TMP12]], ptr [[TMP1]], align 1
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = getelementptr i8, ptr %p, i64 13
+  %2 = getelementptr i8, ptr %p, i64 14
+  %3 = getelementptr i8, ptr %p, i64 15
+  %4 = getelementptr i8, ptr %p, i64 12
+  %5 = zext i8 0 to i32
+  %6 = and i32 %5, 0
+  %.not866 = icmp eq i32 %6, 0
+  %7 = select i1 %.not866, i32 0, i32 0
+  %8 = xor i32 0, %7
+  %9 = zext i8 0 to i32
+  %10 = and i32 %9, 0
+  %.not871 = icmp eq i32 %10, 0
+  %11 = select i1 %.not871, i32 0, i32 0
+  %12 = xor i32 0, %11
+  %13 = xor i32 %9, 0
+  %14 = xor i32 %13, 0
+  %15 = xor i32 %14, 0
+  %16 = xor i32 %15, 0
+  %17 = xor i32 %16, 0
+  %18 = xor i32 %17, %12
+  %19 = xor i32 %18, 0
+  %20 = xor i32 %19, 0
+  %21 = xor i32 %20, 0
+  %22 = xor i32 %21, 0
+  %23 = trunc i32 %22 to i8
+  store i8 %23, ptr %4, align 1
+  %24 = xor i32 %9, 0
+  %25 = xor i32 %24, 0
+  %26 = xor i32 %25, 0
+  %27 = xor i32 %26, 0
+  %28 = xor i32 %27, 0
+  %29 = xor i32 %28, %8
+  %30 = xor i32 %29, 0
+  %31 = xor i32 %30, 0
+  %32 = xor i32 %31, 0
+  %33 = xor i32 %32, 0
+  %34 = trunc i32 %33 to i8
+  store i8 %34, ptr %1, align 1
+  %35 = xor i32 0, %5
+  %36 = xor i32 %35, 0
+  %37 = xor i32 %36, 0
+  %38 = xor i32 %37, 0
+  %39 = xor i32 %38, 0
+  %40 = xor i32 %39, %8
+  %41 = xor i32 %40, 0
+  %42 = xor i32 %41, 0
+  %43 = xor i32 %42, 0
+  %44 = xor i32 %43, 0
+  %45 = trunc i32 %44 to i8
+  store i8 %45, ptr %2, align 1
+  %46 = xor i32 %35, 0
+  %47 = xor i32 %46, 0
+  %48 = xor i32 %47, 0
+  %49 = xor i32 %48, 0
+  %50 = xor i32 %49, %8
+  %51 = xor i32 %50, 0
+  %52 = xor i32 %51, 0
+  %53 = xor i32 %52, 0
+  %54 = xor i32 %53, 0
+  %55 = trunc i32 %54 to i8
+  store i8 %55, ptr %3, align 1
+  ret i64 0
+}
-- 
cgit v1.1


From c8e65e193d542464421ad4f9a9965d45b302ac0c Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Fri, 19 Apr 2024 22:07:11 +0200
Subject: [clang] CTAD: Fix require-clause is not transformed. (#89378)

Fixes https://github.com/llvm/llvm-project/issues/89013

When building the deduction guide, we use the
TemplateArgsForBuildingFPrime to transform the require-clause from the
underlying class deduction guide. However, we do this at the wrong place
where not all elements of TemplateArgsForBuildingFPrime are initialized.
The fix involves rearranging the transformRequireClause call to the
correct location.

As part of the fix, we extend the TemplateInstantiator to support more
types in the template-rewrite mode. Otherwise, we will encounter an
assertion error when attempting to rewrite the template type parameter
type like D with a complex type like Derived<U>.
---
 clang/lib/Sema/SemaTemplate.cpp              | 27 ++++++++++++++-------------
 clang/lib/Sema/SemaTemplateInstantiate.cpp   |  5 +----
 clang/test/SemaCXX/cxx20-ctad-type-alias.cpp | 18 ++++++++++++++++++
 clang/test/SemaTemplate/deduction-guide.cpp  | 28 ++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index d4976f9..4bda31b 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -2962,19 +2962,6 @@ void DeclareImplicitDeductionGuidesForTypeAlias(
           Context.getCanonicalTemplateArgument(
               Context.getInjectedTemplateArg(NewParam));
     }
-    // Substitute new template parameters into requires-clause if present.
-    Expr *RequiresClause =
-        transformRequireClause(SemaRef, F, TemplateArgsForBuildingFPrime);
-    // FIXME: implement the is_deducible constraint per C++
-    // [over.match.class.deduct]p3.3:
-    //    ... and a constraint that is satisfied if and only if the arguments
-    //    of A are deducible (see below) from the return type.
-    auto *FPrimeTemplateParamList = TemplateParameterList::Create(
-        Context, AliasTemplate->getTemplateParameters()->getTemplateLoc(),
-        AliasTemplate->getTemplateParameters()->getLAngleLoc(),
-        FPrimeTemplateParams,
-        AliasTemplate->getTemplateParameters()->getRAngleLoc(),
-        /*RequiresClause=*/RequiresClause);
 
     // To form a deduction guide f' from f, we leverage clang's instantiation
     // mechanism, we construct a template argument list where the template
@@ -3020,6 +3007,20 @@ void DeclareImplicitDeductionGuidesForTypeAlias(
             F, TemplateArgListForBuildingFPrime, AliasTemplate->getLocation(),
             Sema::CodeSynthesisContext::BuildingDeductionGuides)) {
       auto *GG = cast<CXXDeductionGuideDecl>(FPrime);
+      // Substitute new template parameters into requires-clause if present.
+      Expr *RequiresClause =
+          transformRequireClause(SemaRef, F, TemplateArgsForBuildingFPrime);
+      // FIXME: implement the is_deducible constraint per C++
+      // [over.match.class.deduct]p3.3:
+      //    ... and a constraint that is satisfied if and only if the arguments
+      //    of A are deducible (see below) from the return type.
+      auto *FPrimeTemplateParamList = TemplateParameterList::Create(
+          Context, AliasTemplate->getTemplateParameters()->getTemplateLoc(),
+          AliasTemplate->getTemplateParameters()->getLAngleLoc(),
+          FPrimeTemplateParams,
+          AliasTemplate->getTemplateParameters()->getRAngleLoc(),
+          /*RequiresClause=*/RequiresClause);
+
       buildDeductionGuide(SemaRef, AliasTemplate, FPrimeTemplateParamList,
                           GG->getCorrespondingConstructor(),
                           GG->getExplicitSpecifier(), GG->getTypeSourceInfo(),
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 3e6676f..98d5c7c 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -2502,10 +2502,7 @@ TemplateInstantiator::TransformTemplateTypeParmType(TypeLocBuilder &TLB,
       assert(Arg.getKind() == TemplateArgument::Type &&
              "unexpected nontype template argument kind in template rewrite");
       QualType NewT = Arg.getAsType();
-      assert(isa<TemplateTypeParmType>(NewT) &&
-             "type parm not rewritten to type parm");
-      auto NewTL = TLB.push<TemplateTypeParmTypeLoc>(NewT);
-      NewTL.setNameLoc(TL.getNameLoc());
+      TLB.pushTrivial(SemaRef.Context, NewT, TL.getNameLoc());
       return NewT;
     }
 
diff --git a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
index 6f04264..508a3a5 100644
--- a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
+++ b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
@@ -289,3 +289,21 @@ using String = Array<char, N>;
 // Verify no crash on constructing the aggregate deduction guides.
 String s("hello");
 } // namespace test21
+
+// GH89013
+namespace test22 {
+class Base {};
+template <typename T>
+class Derived final : public Base {};
+
+template <typename T, typename D>
+requires __is_base_of(Base, D)
+struct Foo {
+  explicit Foo(D) {}
+};
+
+template <typename U>
+using AFoo = Foo<int, Derived<U>>;
+
+AFoo a(Derived<int>{});
+} // namespace test22
diff --git a/clang/test/SemaTemplate/deduction-guide.cpp b/clang/test/SemaTemplate/deduction-guide.cpp
index 58f08aa..29cc5a9 100644
--- a/clang/test/SemaTemplate/deduction-guide.cpp
+++ b/clang/test/SemaTemplate/deduction-guide.cpp
@@ -260,3 +260,31 @@ AG ag = {1};
 // CHECK:   |-TemplateArgument type 'int'
 // CHECK:   | `-BuiltinType {{.*}} 'int'
 // CHECK:   `-ParmVarDecl {{.*}} 'int'
+
+template <typename D>
+requires (sizeof(D) == 4)
+struct Foo {
+  Foo(D);
+};
+
+template <typename U>
+using AFoo = Foo<G<U>>;
+// Verify that the require-clause from the Foo deduction guide is transformed.
+// The D occurrence should be rewritten to G<type-parameter-0-0>.
+//
+// CHECK-LABEL: Dumping <deduction guide for AFoo>
+// CHECK: FunctionTemplateDecl {{.*}} implicit <deduction guide for AFoo>
+// CHECK-NEXT: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 0 U
+// CHECK-NEXT: |-ParenExpr {{.*}} 'bool'
+// CHECK-NEXT: | `-BinaryOperator {{.*}} 'bool' '=='
+// CHECK-NEXT: |   |-UnaryExprOrTypeTraitExpr {{.*}} 'unsigned long' sizeof 'G<type-parameter-0-0>'
+// CHECK-NEXT: |   `-ImplicitCastExpr {{.*}} 'unsigned long' <IntegralCast>
+// CHECK-NEXT: |     `-IntegerLiteral {{.*}} 'int' 4
+// CHECK-NEXT: |-CXXDeductionGuideDecl {{.*}} implicit <deduction guide for AFoo> 'auto (G<type-parameter-0-0>) -> Foo<G<type-parameter-0-0>>'
+// CHECK-NEXT: | `-ParmVarDecl {{.*}} 'G<type-parameter-0-0>'
+// CHECK-NEXT: `-CXXDeductionGuideDecl {{.*}} implicit used <deduction guide for AFoo> 'auto (G<int>) -> Foo<G<int>>' implicit_instantiation
+// CHECK-NEXT:   |-TemplateArgument type 'int'
+// CHECK-NEXT:   | `-BuiltinType {{.*}} 'int'
+// CHECK-NEXT:   `-ParmVarDecl {{.*}} 'G<int>'
+
+AFoo aa(G<int>{});
-- 
cgit v1.1


From 08163cd9d82690e808c28515523b5fd0923d7b38 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <adrian-prantl@users.noreply.github.com>
Date: Fri, 19 Apr 2024 22:14:55 +0200
Subject: [lldb] Provide a better error message for missing symbols (#89433)

This adds a hint to the missing symbols error message to make it easier
to understand what this means to users.
---
 lldb/source/Expression/IRExecutionUnit.cpp                 | 4 +++-
 lldb/test/API/lang/cpp/constructors/TestCppConstructors.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp
index cb9bee8..07df8c5 100644
--- a/lldb/source/Expression/IRExecutionUnit.cpp
+++ b/lldb/source/Expression/IRExecutionUnit.cpp
@@ -431,7 +431,9 @@ void IRExecutionUnit::GetRunnableInfo(Status &error, lldb::addr_t &func_addr,
     }
 
     m_failed_lookups.clear();
-
+    ss.PutCString(
+        "\nHint: The expression tried to call a function that is not present "
+        "in the target, perhaps because it was optimized out by the compiler.");
     error.SetErrorString(ss.GetString());
 
     return;
diff --git a/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py b/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py
index 6724bfc..140877a 100644
--- a/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py
+++ b/lldb/test/API/lang/cpp/constructors/TestCppConstructors.py
@@ -47,7 +47,7 @@ class TestCase(TestBase):
         self.expect(
             "expr ClassWithDeletedDefaultCtor().value",
             error=True,
-            substrs=["Couldn't look up symbols:"],
+            substrs=["Couldn't look up symbols:", "function missing"],
         )
 
     @skipIfWindows  # Can't find operator new.
-- 
cgit v1.1


From d634b233640dc38cf5f673a9cfcd1fe55124430a Mon Sep 17 00:00:00 2001
From: Samira Bazuzi <bazuzi@google.com>
Date: Fri, 19 Apr 2024 16:23:43 -0400
Subject: [clang][dataflow] Expose getReferencedDecls for a Stmt. (#89444)

---
 clang/include/clang/Analysis/FlowSensitive/ASTOps.h | 3 +++
 clang/lib/Analysis/FlowSensitive/ASTOps.cpp         | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
index f9fd3db..05748f3 100644
--- a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
+++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
@@ -96,6 +96,9 @@ struct ReferencedDecls {
 /// Returns declarations that are declared in or referenced from `FD`.
 ReferencedDecls getReferencedDecls(const FunctionDecl &FD);
 
+/// Returns declarations that are declared in or referenced from `S`.
+ReferencedDecls getReferencedDecls(const Stmt &S);
+
 } // namespace dataflow
 } // namespace clang
 
diff --git a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
index 6f179c1..619bf77 100644
--- a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
+++ b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
@@ -261,4 +261,10 @@ ReferencedDecls getReferencedDecls(const FunctionDecl &FD) {
   return Result;
 }
 
+ReferencedDecls getReferencedDecls(const Stmt &S) {
+  ReferencedDecls Result;
+  getReferencedDecls(S, Result);
+  return Result;
+}
+
 } // namespace clang::dataflow
-- 
cgit v1.1


From 5232cec8f947ed8bff4ca57f990954228d58e66d Mon Sep 17 00:00:00 2001
From: Nicolas van Kempen <nvankemp@gmail.com>
Date: Fri, 19 Apr 2024 17:00:13 -0400
Subject: Apply modernize-use-starts-ends-with on llvm-project (#89140)

Run `modernize-use-starts-ends-with` on llvm-project. Two instances are
flagged, minor readability improvements, extremely minor performance
improvements.

```
python3 clang-tools-extra/clang-tidy/tool/run-clang-tidy.py \
    -clang-tidy-binary="build/bin/clang-tidy" \
    -clang-apply-replacements-binary="build/bin/clang-apply-replacements" \
    -checks="-*,modernize-use-starts-ends-with" \
    -header-filter=".*" \
    -fix -format
```

I am working on some additions to this check, but they don't seem to
flag any additional cases anyway.
---
 clang-tools-extra/clang-doc/Representation.cpp   | 4 ++--
 lldb/unittests/Host/FileSystemTest.cpp           | 2 +-
 llvm/unittests/Support/VirtualFileSystemTest.cpp | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clang-doc/Representation.cpp b/clang-tools-extra/clang-doc/Representation.cpp
index 84233c3..2afff29 100644
--- a/clang-tools-extra/clang-doc/Representation.cpp
+++ b/clang-tools-extra/clang-doc/Representation.cpp
@@ -380,8 +380,8 @@ ClangDocContext::ClangDocContext(tooling::ExecutionContext *ECtx,
   this->SourceRoot = std::string(SourceRootDir);
   if (!RepositoryUrl.empty()) {
     this->RepositoryUrl = std::string(RepositoryUrl);
-    if (!RepositoryUrl.empty() && RepositoryUrl.find("http://") != 0 &&
-        RepositoryUrl.find("https://") != 0)
+    if (!RepositoryUrl.empty() && !RepositoryUrl.starts_with("http://") &&
+        !RepositoryUrl.starts_with("https://"))
       this->RepositoryUrl->insert(0, "https://");
   }
 }
diff --git a/lldb/unittests/Host/FileSystemTest.cpp b/lldb/unittests/Host/FileSystemTest.cpp
index 3b5ee7c..58887f6 100644
--- a/lldb/unittests/Host/FileSystemTest.cpp
+++ b/lldb/unittests/Host/FileSystemTest.cpp
@@ -93,7 +93,7 @@ public:
     std::map<std::string, vfs::Status>::iterator I;
     std::string Path;
     bool isInPath(StringRef S) {
-      if (Path.size() < S.size() && S.find(Path) == 0) {
+      if (Path.size() < S.size() && S.starts_with(Path)) {
         auto LastSep = S.find_last_of('/');
         if (LastSep == Path.size() || LastSep == Path.size() - 1)
           return true;
diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index e9b4ac3..e9fd967 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -101,7 +101,7 @@ public:
     std::map<std::string, vfs::Status>::iterator I;
     std::string Path;
     bool isInPath(StringRef S) {
-      if (Path.size() < S.size() && S.find(Path) == 0) {
+      if (Path.size() < S.size() && S.starts_with(Path)) {
         auto LastSep = S.find_last_of('/');
         if (LastSep == Path.size() || LastSep == Path.size() - 1)
           return true;
-- 
cgit v1.1


From 45432eec0ae6a7f7452196eb099814d1a7dc2c0f Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Fri, 19 Apr 2024 14:21:03 -0700
Subject: [ARM64EC] Add softintrin.lib as an implicit dependency to object
 files. (#89171)

This copies MSVC behavior, and avoids weird link errors in certain
cases.
---
 clang/lib/Driver/ToolChains/Clang.cpp | 12 +++++++++---
 clang/test/Driver/cl-options.c        |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 456ea74..97b4aa1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4733,7 +4733,7 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
                        Output.getFilename());
 }
 
-static void ProcessVSRuntimeLibrary(const ArgList &Args,
+static void ProcessVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args,
                                     ArgStringList &CmdArgs) {
   unsigned RTOptionID = options::OPT__SLASH_MT;
 
@@ -4796,6 +4796,12 @@ static void ProcessVSRuntimeLibrary(const ArgList &Args,
     // implemented in clang.
     CmdArgs.push_back("--dependent-lib=oldnames");
   }
+
+  // All Arm64EC object files implicitly add softintrin.lib. This is necessary
+  // even if the file doesn't actually refer to any of the routines because
+  // the CRT itself has incomplete dependency markings.
+  if (TC.getTriple().isWindowsArm64EC())
+    CmdArgs.push_back("--dependent-lib=softintrin");
 }
 
 void Clang::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7051,7 +7057,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (Triple.isWindowsMSVCEnvironment() && !D.IsCLMode() &&
       Args.hasArg(options::OPT_fms_runtime_lib_EQ))
-    ProcessVSRuntimeLibrary(Args, CmdArgs);
+    ProcessVSRuntimeLibrary(getToolChain(), Args, CmdArgs);
 
   // Handle -fgcc-version, if present.
   VersionTuple GNUCVer;
@@ -8178,7 +8184,7 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
                            ArgStringList &CmdArgs) const {
   bool isNVPTX = getToolChain().getTriple().isNVPTX();
 
-  ProcessVSRuntimeLibrary(Args, CmdArgs);
+  ProcessVSRuntimeLibrary(getToolChain(), Args, CmdArgs);
 
   if (Arg *ShowIncludes =
           Args.getLastArg(options::OPT__SLASH_showIncludes,
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index 5b6dfe3..7731300 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -790,6 +790,7 @@
 // RUN: %clang_cl -vctoolsdir "" /arm64EC /c -### -- %s 2>&1 | FileCheck --check-prefix=ARM64EC %s 
 // ARM64EC-NOT: /arm64EC has been overridden by specified target
 // ARM64EC: "-triple" "arm64ec-pc-windows-msvc19.33.0"
+// ARM64EC-SAME: "--dependent-lib=softintrin"
 
 // RUN: %clang_cl -vctoolsdir "" /arm64EC /c -target x86_64-pc-windows-msvc  -### -- %s 2>&1 | FileCheck --check-prefix=ARM64EC_OVERRIDE %s
 // ARM64EC_OVERRIDE: warning: /arm64EC has been overridden by specified target: x86_64-pc-windows-msvc; option ignored
-- 
cgit v1.1


From 5bcf31ebfad8b32aed20dd47be6238cc19710e63 Mon Sep 17 00:00:00 2001
From: Bill Wendling <5993918+bwendling@users.noreply.github.com>
Date: Fri, 19 Apr 2024 14:38:17 -0700
Subject: [Clang] Loop over FieldDecls instead of all Decls (#89453)

Only FieldDecls are of importance here. A struct defined within another
struct has the same semantics as if it were defined outside of the
struct. So there's no need to look into RecordDecls that aren't a field.
---
 clang/lib/CodeGen/CGBuiltin.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4319501..4ab844d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -861,14 +861,13 @@ const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberField(
 static unsigned CountCountedByAttrs(const RecordDecl *RD) {
   unsigned Num = 0;
 
-  for (const Decl *D : RD->decls()) {
-    if (const auto *FD = dyn_cast<FieldDecl>(D);
-        FD && FD->getType()->isCountAttributedType()) {
+  for (const FieldDecl *FD : RD->fields()) {
+    if (FD->getType()->isCountAttributedType())
       return ++Num;
-    }
 
-    if (const auto *Rec = dyn_cast<RecordDecl>(D))
-      Num += CountCountedByAttrs(Rec);
+    QualType Ty = FD->getType();
+    if (Ty->isRecordType())
+      Num += CountCountedByAttrs(Ty->getAsRecordDecl());
   }
 
   return Num;
-- 
cgit v1.1


From 2a632d3d9f5c70db38c617b0816deb37ef722a7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Fri, 19 Apr 2024 14:49:56 -0700
Subject: [flang][cuda] Use fir.cuda_deallocate for automatic deallocation
 (#89450)

Automatic deallocation of allocatable that are cuda device variable must
use the fir.cuda_deallocate operation. This patch update the automatic
deallocation code generation to use this operation when the variable is
a cuda variable.
---
 flang/include/flang/Lower/Allocatable.h    |  4 +++-
 flang/lib/Lower/Allocatable.cpp            | 12 +++++++-----
 flang/lib/Lower/ConvertVariable.cpp        |  5 +++--
 flang/test/Lower/CUDA/cuda-allocatable.cuf | 30 ++++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/flang/include/flang/Lower/Allocatable.h b/flang/include/flang/Lower/Allocatable.h
index d3c16de..e8738f0 100644
--- a/flang/include/flang/Lower/Allocatable.h
+++ b/flang/include/flang/Lower/Allocatable.h
@@ -55,12 +55,14 @@ void genDeallocateStmt(AbstractConverter &converter,
 
 void genDeallocateBox(AbstractConverter &converter,
                       const fir::MutableBoxValue &box, mlir::Location loc,
+                      const Fortran::semantics::Symbol *sym = nullptr,
                       mlir::Value declaredTypeDesc = {});
 
 /// Deallocate an allocatable if it is allocated at the end of its lifetime.
 void genDeallocateIfAllocated(AbstractConverter &converter,
                               const fir::MutableBoxValue &box,
-                              mlir::Location loc);
+                              mlir::Location loc,
+                              const Fortran::semantics::Symbol *sym = nullptr);
 
 /// Create a MutableBoxValue for an allocatable or pointer entity.
 /// If the variables is a local variable that is not a dummy, it will be
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 38f6152..8e84ea2 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -859,18 +859,20 @@ genDeallocate(fir::FirOpBuilder &builder,
 void Fortran::lower::genDeallocateBox(
     Fortran::lower::AbstractConverter &converter,
     const fir::MutableBoxValue &box, mlir::Location loc,
-    mlir::Value declaredTypeDesc) {
+    const Fortran::semantics::Symbol *sym, mlir::Value declaredTypeDesc) {
   const Fortran::lower::SomeExpr *statExpr = nullptr;
   const Fortran::lower::SomeExpr *errMsgExpr = nullptr;
   ErrorManager errorManager;
   errorManager.init(converter, loc, statExpr, errMsgExpr);
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
-  genDeallocate(builder, converter, loc, box, errorManager, declaredTypeDesc);
+  genDeallocate(builder, converter, loc, box, errorManager, declaredTypeDesc,
+                sym);
 }
 
 void Fortran::lower::genDeallocateIfAllocated(
     Fortran::lower::AbstractConverter &converter,
-    const fir::MutableBoxValue &box, mlir::Location loc) {
+    const fir::MutableBoxValue &box, mlir::Location loc,
+    const Fortran::semantics::Symbol *sym) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Value isAllocated =
       fir::factory::genIsAllocatedOrAssociatedTest(builder, loc, box);
@@ -880,9 +882,9 @@ void Fortran::lower::genDeallocateIfAllocated(
             eleType.isa<fir::RecordType>() && box.isPolymorphic()) {
           mlir::Value declaredTypeDesc = builder.create<fir::TypeDescOp>(
               loc, mlir::TypeAttr::get(eleType));
-          genDeallocateBox(converter, box, loc, declaredTypeDesc);
+          genDeallocateBox(converter, box, loc, sym, declaredTypeDesc);
         } else {
-          genDeallocateBox(converter, box, loc);
+          genDeallocateBox(converter, box, loc, sym);
         }
       })
       .end();
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 2d2d9eb..c40435c 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -916,13 +916,14 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
       break;
     case VariableCleanUp::Deallocate:
       auto *converterPtr = &converter;
-      converter.getFctCtx().attachCleanup([converterPtr, loc, exv]() {
+      auto *sym = &var.getSymbol();
+      converter.getFctCtx().attachCleanup([converterPtr, loc, exv, sym]() {
         const fir::MutableBoxValue *mutableBox =
             exv.getBoxOf<fir::MutableBoxValue>();
         assert(mutableBox &&
                "trying to deallocate entity not lowered as allocatable");
         Fortran::lower::genDeallocateIfAllocated(*converterPtr, *mutableBox,
-                                                 loc);
+                                                 loc, sym);
       });
     }
   }
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 5b10334..adbf172 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -17,6 +17,15 @@ end subroutine
 
 ! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
 
+! CHECK: %[[BOX_LOAD:.*]] = fir.load %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[BOX_LOAD]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+! CHECK: %[[ADDR_I64:.*]] = fir.convert %[[ADDR]] : (!fir.heap<!fir.array<?xf32>>) -> i64
+! CHECK: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK: %[[NE_C0:.*]] = arith.cmpi ne, %[[ADDR_I64]], %[[C0]] : i64
+! CHECK: fir.if %[[NE_C0]] {
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: }
+
 subroutine sub2()
   real, allocatable, managed :: a(:)
   integer :: istat
@@ -37,6 +46,10 @@ end subroutine
 ! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
 
+! CHECK: fir.if %{{.*}} {
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>} -> i32
+! CHECK: }
+
 subroutine sub3()
   integer, allocatable, pinned :: a(:,:)
   logical :: plog
@@ -50,6 +63,9 @@ end subroutine
 ! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK-2: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {cuda_attr = #fir.cuda<pinned>} -> i32
+! CHECK: fir.if %{{.*}} {
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {cuda_attr = #fir.cuda<pinned>} -> i32
+! CHECK: }
 
 subroutine sub4()
   real, allocatable, unified :: a(:)
@@ -65,6 +81,9 @@ end subroutine
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
 ! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<unified>} -> i32
+! CHECK: fir.if %{{.*}} {
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<unified>} -> i32
+! CHECK: }
 
 subroutine sub5()
   real, allocatable, device :: a(:)
@@ -80,6 +99,11 @@ end subroutine
 ! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: fir.if
+! CHECK: fir.freemem
+! CHECK: fir.if %{{.*}} {
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: }
 
 subroutine sub6()
   real, allocatable, device :: a(:)
@@ -95,6 +119,9 @@ end subroutine
 ! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
 ! CHECK: fir.call @_FortranAAllocatableApplyMold
 ! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: fir.if %{{.*}} {
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: }
 
 subroutine sub7()
   real, allocatable, device :: a(:)
@@ -120,3 +147,6 @@ end subroutine
 ! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
 ! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+! CHECK: fir.if %{{.*}} {
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: }
-- 
cgit v1.1


From 7c3dfb29dc4b5345da6a7fb25f92bf8d2919bce9 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Fri, 19 Apr 2024 16:52:04 -0500
Subject: [mlir][python] fix memref._is_constant_int_like (#89447)

---
 mlir/python/mlir/dialects/memref.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/python/mlir/dialects/memref.py b/mlir/python/mlir/dialects/memref.py
index a3d7834..bc9a3a5 100644
--- a/mlir/python/mlir/dialects/memref.py
+++ b/mlir/python/mlir/dialects/memref.py
@@ -8,12 +8,13 @@ from typing import Optional
 from ._memref_ops_gen import *
 from ._ods_common import _dispatch_mixed_values, MixedValues
 from .arith import ConstantOp, _is_integer_like_type
-from ..ir import Value, MemRefType, StridedLayoutAttr, ShapedType
+from ..ir import Value, MemRefType, StridedLayoutAttr, ShapedType, Operation
 
 
 def _is_constant_int_like(i):
     return (
         isinstance(i, Value)
+        and isinstance(i.owner, Operation)
         and isinstance(i.owner.opview, ConstantOp)
         and _is_integer_like_type(i.type)
     )
-- 
cgit v1.1


From 16e3464852efe3001060ff7feb3261dd397bfe84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Fri, 19 Apr 2024 14:58:22 -0700
Subject: [flang][cuda] Enforce PINNED attribute when ALLOCATE with PINNED
 option (#89455)

When the PINNED option is specified on an ALLOCATE statement, the object
must have the PINNED attribute.
---
 flang/lib/Semantics/check-allocate.cpp | 7 +++++++
 flang/test/Semantics/cuf07.cuf         | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/flang/lib/Semantics/check-allocate.cpp b/flang/lib/Semantics/check-allocate.cpp
index a7244e1..7bfd862 100644
--- a/flang/lib/Semantics/check-allocate.cpp
+++ b/flang/lib/Semantics/check-allocate.cpp
@@ -611,6 +611,13 @@ bool AllocationCheckerHelper::RunChecks(SemanticsContext &context) {
       return false;
     }
   }
+  if (allocateInfo_.gotPinned) {
+    std::optional<common::CUDADataAttr> cudaAttr{GetCUDADataAttr(ultimate_)};
+    if (!cudaAttr || *cudaAttr != common::CUDADataAttr::Pinned) {
+      context.Say(name_.source,
+          "Object in ALLOCATE must have PINNED attribute when PINNED option is specified"_err_en_US);
+    }
+  }
   return RunCoarrayRelatedChecks(context);
 }
 
diff --git a/flang/test/Semantics/cuf07.cuf b/flang/test/Semantics/cuf07.cuf
index b520b5d..91a78e1 100644
--- a/flang/test/Semantics/cuf07.cuf
+++ b/flang/test/Semantics/cuf07.cuf
@@ -23,4 +23,12 @@ module m
     !BECAUSE: 'ma' is a host-associated allocatable and is not definable in a device subprogram
     deallocate(ma)
   end subroutine
+
+  subroutine hostsub()
+    integer, allocatable, device :: ia(:)
+    logical :: plog
+
+    !ERROR: Object in ALLOCATE must have PINNED attribute when PINNED option is specified
+    allocate(ia(100), pinned = plog)
+  end subroutine
 end module
-- 
cgit v1.1


From 4523a267829c807f3fc8fab8e5e9613985a51565 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Fri, 19 Apr 2024 15:19:37 -0700
Subject: [flang][cuda] Enforce DEVICE attribute when ALLOCATE with STREAM
 option (#89459)

When the STREAM option is specified on an ALLOCATE statement, the object
must have the DEVICE attribute.
---
 flang/lib/Semantics/check-allocate.cpp     | 7 +++++++
 flang/test/Lower/CUDA/cuda-allocatable.cuf | 8 ++++----
 flang/test/Parser/cuf-sanity-common        | 2 +-
 flang/test/Parser/cuf-sanity-tree.CUF      | 2 --
 flang/test/Parser/cuf-sanity-unparse.CUF   | 2 +-
 flang/test/Semantics/cuf07.cuf             | 8 ++++++++
 6 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/flang/lib/Semantics/check-allocate.cpp b/flang/lib/Semantics/check-allocate.cpp
index 7bfd862..b4c5660 100644
--- a/flang/lib/Semantics/check-allocate.cpp
+++ b/flang/lib/Semantics/check-allocate.cpp
@@ -618,6 +618,13 @@ bool AllocationCheckerHelper::RunChecks(SemanticsContext &context) {
           "Object in ALLOCATE must have PINNED attribute when PINNED option is specified"_err_en_US);
     }
   }
+  if (allocateInfo_.gotStream) {
+    std::optional<common::CUDADataAttr> cudaAttr{GetCUDADataAttr(ultimate_)};
+    if (!cudaAttr || *cudaAttr != common::CUDADataAttr::Device) {
+      context.Say(name_.source,
+          "Object in ALLOCATE must have DEVICE attribute when STREAM option is specified"_err_en_US);
+    }
+  }
   return RunCoarrayRelatedChecks(context);
 }
 
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index adbf172..eff5f13 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -68,21 +68,21 @@ end subroutine
 ! CHECK: }
 
 subroutine sub4()
-  real, allocatable, unified :: a(:)
+  real, allocatable, device :: a(:)
   integer :: istream
   allocate(a(10), stream=istream)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub4()
 ! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub4Ea"}
-! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<unified>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
 ! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
-! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<unified>} -> i32
+! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<device>} -> i32
 ! CHECK: fir.if %{{.*}} {
-! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<unified>} -> i32
+! CHECK:   %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
 ! CHECK: }
 
 subroutine sub5()
diff --git a/flang/test/Parser/cuf-sanity-common b/flang/test/Parser/cuf-sanity-common
index 7f4217f..b097a6a 100644
--- a/flang/test/Parser/cuf-sanity-common
+++ b/flang/test/Parser/cuf-sanity-common
@@ -32,6 +32,6 @@ module m
     call globalsub<<<1, 2>>>
     call globalsub<<<1, 2, 3>>>
     call globalsub<<<1, 2, 3, 4>>>
-    allocate(pa(32), stream = 1, pinned = isPinned)
+    allocate(pa(32), pinned = isPinned)
   end subroutine
 end module
diff --git a/flang/test/Parser/cuf-sanity-tree.CUF b/flang/test/Parser/cuf-sanity-tree.CUF
index dc12759..2820441 100644
--- a/flang/test/Parser/cuf-sanity-tree.CUF
+++ b/flang/test/Parser/cuf-sanity-tree.CUF
@@ -199,8 +199,6 @@ include "cuf-sanity-common"
 !CHECK: | | | | | | AllocateShapeSpec
 !CHECK: | | | | | | | Scalar -> Integer -> Expr = '32_4'
 !CHECK: | | | | | | | | LiteralConstant -> IntLiteralConstant = '32'
-!CHECK: | | | | | AllocOpt -> Stream -> Scalar -> Integer -> Expr = '1_4'
-!CHECK: | | | | | | LiteralConstant -> IntLiteralConstant = '1'
 !CHECK: | | | | | AllocOpt -> Pinned -> Scalar -> Logical -> Variable = 'ispinned'
 !CHECK: | | | | | | Designator -> DataRef -> Name = 'ispinned'
 !CHECK: | | | EndSubroutineStmt -> 
diff --git a/flang/test/Parser/cuf-sanity-unparse.CUF b/flang/test/Parser/cuf-sanity-unparse.CUF
index 7ac3944..b6921e7 100644
--- a/flang/test/Parser/cuf-sanity-unparse.CUF
+++ b/flang/test/Parser/cuf-sanity-unparse.CUF
@@ -37,6 +37,6 @@ include "cuf-sanity-common"
 !CHECK:    CALL globalsub<<<1_4,2_4>>>()
 !CHECK:    CALL globalsub<<<1_4,2_4,3_4>>>()
 !CHECK:    CALL globalsub<<<1_4,2_4,3_4,4_4>>>()
-!CHECK:   ALLOCATE(pa(32_4), STREAM=1_4, PINNED=ispinned)
+!CHECK:   ALLOCATE(pa(32_4), PINNED=ispinned)
 !CHECK:  END SUBROUTINE
 !CHECK: END MODULE
diff --git a/flang/test/Semantics/cuf07.cuf b/flang/test/Semantics/cuf07.cuf
index 91a78e1..c48abb5 100644
--- a/flang/test/Semantics/cuf07.cuf
+++ b/flang/test/Semantics/cuf07.cuf
@@ -31,4 +31,12 @@ module m
     !ERROR: Object in ALLOCATE must have PINNED attribute when PINNED option is specified
     allocate(ia(100), pinned = plog)
   end subroutine
+
+  subroutine host2()
+    integer, allocatable, pinned :: ia(:)
+    integer :: istream
+
+    !ERROR: Object in ALLOCATE must have DEVICE attribute when STREAM option is specified
+    allocate(ia(100), stream = istream)
+  end subroutine
 end module
-- 
cgit v1.1


From d9169ffaf7c01691644537d3443240748b107359 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 19 Apr 2024 15:29:14 -0700
Subject: [BOLT,test] Update AArch64/constant_island_pie_update.s after
 llvm-readelf -r RELR change

---
 bolt/test/AArch64/constant_island_pie_update.s | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/bolt/test/AArch64/constant_island_pie_update.s b/bolt/test/AArch64/constant_island_pie_update.s
index 0ab67d0..313e103 100644
--- a/bolt/test/AArch64/constant_island_pie_update.s
+++ b/bolt/test/AArch64/constant_island_pie_update.s
@@ -18,7 +18,7 @@
 # RUN: llvm-objdump -j .text -d --show-all-symbols %t.relr.bolt | FileCheck %s
 # RUN: llvm-objdump -j .text -d %t.relr.bolt | \
 # RUN:   FileCheck %s --check-prefix=ADDENDCHECK
-# RUN: llvm-readelf -rsW %t.relr.bolt | FileCheck --check-prefix=ELFCHECK %s
+# RUN: llvm-readelf -rsW %t.relr.bolt | FileCheck --check-prefix=RELRELFCHECK %s
 # RUN: llvm-readelf -SW %t.relr.bolt | FileCheck --check-prefix=RELRSZCHECK %s
 
 // Check that the CI value was updated
@@ -51,6 +51,12 @@
 # ELFCHECK-NEXT: {{.*}} R_AARCH64_RELATIVE
 # ELFCHECK: {{.*}}[[#OFF]] {{.*}} $d
 
+# RELRELFCHECK:       $d{{$}}
+# RELRELFCHECK-NEXT:  $d + 0x8{{$}}
+# RELRELFCHECK-NEXT:  $d + 0x18{{$}}
+# RELRELFCHECK-NEXT:  mytextP
+# RELRELFCHECK-EMPTY:
+
 // Check that .relr.dyn size is 2 bytes to ensure that last 3 relocations were
 // encoded as a bitmap so the total section size for 3 relocations is 2 bytes.
 # RELRSZCHECK: .relr.dyn RELR [[#%x,ADDR:]] [[#%x,OFF:]] {{0*}}10
-- 
cgit v1.1