From 507b37248d2b63fb645bb7052908818b39fe8a6f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 16:03:05 -0700 Subject: [RISCV] Remove unused tablegen multiclass. NFC --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 603c198..8a0308f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2369,12 +2369,6 @@ multiclass VPseudoBinaryW_VI { "@earlyclobber $rd", TargetConstraintType=3>; } -multiclass VPseudoBinaryW_VF { - defm "_V" # f.FX : VPseudoBinary; -} - multiclass VPseudoBinaryW_VF_RM { defm "_V" # f.FX : VPseudoBinaryRoundingMode { defm "_WX" : VPseudoBinary; } -multiclass VPseudoBinaryW_WF { - defm "_W" # f.FX : VPseudoBinary; -} - multiclass VPseudoBinaryW_WF_RM { defm "_W" # f.FX : VPseudoBinaryRoundingMode Date: Fri, 7 Jun 2024 16:31:06 -0700 Subject: [RISCV] Remove CarryIn and Constraint parameters from VPseudoTiedBinaryCarryIn. NFC They were always passed the same values, 1 for CarryIn and "" for Constraint. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 33 +++++++++---------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 8a0308f..72e8ae7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1549,20 +1549,15 @@ class VPseudoTiedBinaryCarryIn : Pseudo<(outs RetClass:$rd), - !if(CarryIn, - (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, - VMV0:$carry, AVL:$vl, ixlenimm:$sew), - (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, - AVL:$vl, ixlenimm:$sew)), []>, + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, + VMV0:$carry, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let Constraints = "$rd = $merge"; let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; @@ -2465,13 +2460,11 @@ multiclass VPseudoBinaryV_VM; } -multiclass VPseudoTiedBinaryV_VM { +multiclass VPseudoTiedBinaryV_VM { let isCommutable = Commutable in def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, m.vrclass, m, 1, "", - TargetConstraintType>; + m.vrclass, m.vrclass, m>; } multiclass VPseudoBinaryV_XM; } -multiclass VPseudoTiedBinaryV_XM { +multiclass VPseudoTiedBinaryV_XM { def "_VXM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, GPR, m, 1, "", - TargetConstraintType>; + m.vrclass, GPR, m>; } multiclass VPseudoVMRG_FM { @@ -2496,8 +2488,7 @@ multiclass VPseudoVMRG_FM { defvar mx = m.MX; def "_V" # f.FX # "M_" # mx : VPseudoTiedBinaryCarryIn.R, m.vrclass, - f.fprclass, m, CarryIn=1, - Constraint = "">, + f.fprclass, m>, SchedBinary<"WriteVFMergeV", "ReadVFMergeV", "ReadVFMergeF", mx, forceMasked=1, forceMergeOpRead=true>; } @@ -2516,7 +2507,7 @@ multiclass VPseudoBinaryV_IM { def "_VIM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, simm5, m, 1, "">; + m.vrclass, simm5, m>; } multiclass VPseudoUnaryVMV_V_X_I { @@ -3073,17 +3064,17 @@ multiclass VPseudoVMRG_VM_XM_IM { defvar mx = m.MX; def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, m.vrclass, m, 1, "">, + m.vrclass, m.vrclass, m>, SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx, forceMergeOpRead=true>; def "_VXM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, GPR, m, 1, "">, + m.vrclass, GPR, m>, SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx, forceMergeOpRead=true>; def "_VIM" # "_" # m.MX: VPseudoTiedBinaryCarryIn.R, - m.vrclass, simm5, m, 1, "">, + m.vrclass, simm5, m>, SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx, forceMergeOpRead=true>; } -- cgit v1.1 From c8eff8788f1052398176f3aca5aebbd12368920a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 16:47:06 -0700 Subject: [RISCV] Rename VPseudoBinaryCarryIn to VPseudoBinaryCarry. NFC It doesn't always have a CarryIn. One of the parameters is named CarryIn. It always has CarryOut or a CarryIn and in some cases both. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 38 ++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 72e8ae7..818073d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1521,13 +1521,13 @@ class VPseudoTiedBinaryMaskRoundingMode : +class VPseudoBinaryCarry : Pseudo<(outs RetClass:$rd), !if(CarryIn, (ins Op1Class:$rs2, Op2Class:$rs1, @@ -2454,10 +2454,10 @@ multiclass VPseudoBinaryV_VM { let isCommutable = Commutable in def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>; + VPseudoBinaryCarry.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>; } multiclass VPseudoTiedBinaryV_VM { @@ -2470,10 +2470,10 @@ multiclass VPseudoTiedBinaryV_VM { multiclass VPseudoBinaryV_XM { def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>; + VPseudoBinaryCarry.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>; } multiclass VPseudoTiedBinaryV_XM { @@ -2498,10 +2498,10 @@ multiclass VPseudoVMRG_FM { multiclass VPseudoBinaryV_IM { def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, simm5, m, CarryIn, Constraint, TargetConstraintType>; + VPseudoBinaryCarry.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint, TargetConstraintType>; } multiclass VPseudoTiedBinaryV_IM { -- cgit v1.1 From 435dd9746107e13c2ad019be3bd34815f7d2360d Mon Sep 17 00:00:00 2001 From: jimingham Date: Fri, 7 Jun 2024 17:05:29 -0700 Subject: Add AllowRepeats to SBCommandInterpreterRunOptions. (#94786) This is useful if you have a transcript of a user session and want to rerun those commands with RunCommandInterpreter. The same functionality is also useful in testing. I'm adding it primarily for the second reason. In a subsequent patch, I'm adding the ability to Python based commands to provide their "auto-repeat" command. Among other things, that will allow potentially state destroying user commands to prevent auto-repeat. Testing this with Shell or pexpect tests is not nearly as accurate or convenient as using RunCommandInterpreter, but to use that I need to allow auto-repeat. I think for consistency's sake, having interactive sessions always do auto-repeats is the right choice, though that's a lightly held opinion... --- .../SBCommandInterpreterRunOptionsDocstrings.i | 3 ++ .../lldb/API/SBCommandInterpreterRunOptions.h | 8 +++ lldb/include/lldb/Interpreter/CommandInterpreter.h | 16 +++++- lldb/source/API/SBCommandInterpreterRunOptions.cpp | 12 +++++ lldb/source/Interpreter/CommandInterpreter.cpp | 14 +++-- .../interpreter/TestRunCommandInterpreterAPI.py | 59 ++++++++++++++++++---- 6 files changed, 98 insertions(+), 14 deletions(-) diff --git a/lldb/bindings/interface/SBCommandInterpreterRunOptionsDocstrings.i b/lldb/bindings/interface/SBCommandInterpreterRunOptionsDocstrings.i index b37da05..a4398d9 100644 --- a/lldb/bindings/interface/SBCommandInterpreterRunOptionsDocstrings.i +++ b/lldb/bindings/interface/SBCommandInterpreterRunOptionsDocstrings.i @@ -10,5 +10,8 @@ A default SBCommandInterpreterRunOptions object has: * PrintResults: true * PrintErrors: true * AddToHistory: true +* AllowRepeats false +Interactive debug sessions always allow repeats, the AllowRepeats +run option only affects non-interactive sessions. ") lldb::SBCommandInterpreterRunOptions; diff --git a/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h b/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h index 69b96926..0f248c9 100644 --- a/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h +++ b/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h @@ -72,6 +72,14 @@ public: void SetSpawnThread(bool); + bool GetAllowRepeats() const; + + /// By default, RunCommandInterpreter will discard repeats if the + /// IOHandler being used is not interactive. Setting AllowRepeats to true + /// will override this behavior and always process empty lines in the input + /// as a repeat command. + void SetAllowRepeats(bool); + private: lldb_private::CommandInterpreterRunOptions *get() const; diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h index 8863523..48f6618 100644 --- a/lldb/include/lldb/Interpreter/CommandInterpreter.h +++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h @@ -93,15 +93,20 @@ public: /// \param[in] add_to_history /// If \b true add the commands to the command history. If \b false, don't /// add them. + /// \param[in] handle_repeats + /// If \b true then treat empty lines as repeat commands even if the + /// interpreter is non-interactive. CommandInterpreterRunOptions(LazyBool stop_on_continue, LazyBool stop_on_error, LazyBool stop_on_crash, LazyBool echo_commands, LazyBool echo_comments, LazyBool print_results, LazyBool print_errors, - LazyBool add_to_history) + LazyBool add_to_history, + LazyBool handle_repeats) : m_stop_on_continue(stop_on_continue), m_stop_on_error(stop_on_error), m_stop_on_crash(stop_on_crash), m_echo_commands(echo_commands), m_echo_comment_commands(echo_comments), m_print_results(print_results), - m_print_errors(print_errors), m_add_to_history(add_to_history) {} + m_print_errors(print_errors), m_add_to_history(add_to_history), + m_allow_repeats(handle_repeats) {} CommandInterpreterRunOptions() = default; @@ -183,6 +188,12 @@ public: m_spawn_thread = spawn_thread ? eLazyBoolYes : eLazyBoolNo; } + bool GetAllowRepeats() const { return DefaultToNo(m_allow_repeats); } + + void SetAllowRepeats(bool allow_repeats) { + m_allow_repeats = allow_repeats ? eLazyBoolYes : eLazyBoolNo; + } + LazyBool m_stop_on_continue = eLazyBoolCalculate; LazyBool m_stop_on_error = eLazyBoolCalculate; LazyBool m_stop_on_crash = eLazyBoolCalculate; @@ -193,6 +204,7 @@ public: LazyBool m_add_to_history = eLazyBoolCalculate; LazyBool m_auto_handle_events; LazyBool m_spawn_thread; + LazyBool m_allow_repeats = eLazyBoolCalculate; private: static bool DefaultToYes(LazyBool flag) { diff --git a/lldb/source/API/SBCommandInterpreterRunOptions.cpp b/lldb/source/API/SBCommandInterpreterRunOptions.cpp index 6c6b2aa..0c7581d 100644 --- a/lldb/source/API/SBCommandInterpreterRunOptions.cpp +++ b/lldb/source/API/SBCommandInterpreterRunOptions.cpp @@ -164,6 +164,18 @@ void SBCommandInterpreterRunOptions::SetSpawnThread(bool spawn_thread) { m_opaque_up->SetSpawnThread(spawn_thread); } +bool SBCommandInterpreterRunOptions::GetAllowRepeats() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_up->GetAllowRepeats(); +} + +void SBCommandInterpreterRunOptions::SetAllowRepeats(bool allow_repeats) { + LLDB_INSTRUMENT_VA(this, allow_repeats); + + m_opaque_up->SetAllowRepeats(allow_repeats); +} + lldb_private::CommandInterpreterRunOptions * SBCommandInterpreterRunOptions::get() const { return m_opaque_up.get(); diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index acd6294..da995de 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2707,7 +2707,8 @@ enum { eHandleCommandFlagEchoCommentCommand = (1u << 3), eHandleCommandFlagPrintResult = (1u << 4), eHandleCommandFlagPrintErrors = (1u << 5), - eHandleCommandFlagStopOnCrash = (1u << 6) + eHandleCommandFlagStopOnCrash = (1u << 6), + eHandleCommandFlagAllowRepeats = (1u << 7) }; void CommandInterpreter::HandleCommandsFromFile( @@ -3129,14 +3130,19 @@ void CommandInterpreter::IOHandlerInputComplete(IOHandler &io_handler, return; const bool is_interactive = io_handler.GetIsInteractive(); - if (!is_interactive) { + const bool allow_repeats = + io_handler.GetFlags().Test(eHandleCommandFlagAllowRepeats); + + if (!is_interactive && !allow_repeats) { // When we are not interactive, don't execute blank lines. This will happen // sourcing a commands file. We don't want blank lines to repeat the // previous command and cause any errors to occur (like redefining an // alias, get an error and stop parsing the commands file). + // But obey the AllowRepeats flag if the user has set it. if (line.empty()) return; - + } + if (!is_interactive) { // When using a non-interactive file handle (like when sourcing commands // from a file) we need to echo the command out so we don't just see the // command output and no command... @@ -3388,6 +3394,8 @@ CommandInterpreter::GetIOHandler(bool force_create, flags |= eHandleCommandFlagPrintResult; if (options->m_print_errors != eLazyBoolNo) flags |= eHandleCommandFlagPrintErrors; + if (options->m_allow_repeats == eLazyBoolYes) + flags |= eHandleCommandFlagAllowRepeats; } else { flags = eHandleCommandFlagEchoCommand | eHandleCommandFlagPrintResult | eHandleCommandFlagPrintErrors; diff --git a/lldb/test/API/python_api/interpreter/TestRunCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestRunCommandInterpreterAPI.py index af97493..f677b86 100644 --- a/lldb/test/API/python_api/interpreter/TestRunCommandInterpreterAPI.py +++ b/lldb/test/API/python_api/interpreter/TestRunCommandInterpreterAPI.py @@ -47,28 +47,66 @@ class CommandRunInterpreterAPICase(TestBase): TestBase.setUp(self) self.stdin_path = self.getBuildArtifact("stdin.txt") + self.stdout_path = self.getBuildArtifact("stdout.txt") + + def run_commands_string( + self, command_string, options=lldb.SBCommandInterpreterRunOptions() + ): + """Run the commands in command_string through RunCommandInterpreter. + Returns (n_errors, quit_requested, has_crashed, result_string).""" with open(self.stdin_path, "w") as input_handle: - input_handle.write("nonexistingcommand\nquit") + input_handle.write(command_string) - self.dbg.SetInputFile(open(self.stdin_path, "r")) + n_errors = 0 + quit_requested = False + has_crashed = False - # No need to track the output - devnull = open(os.devnull, "w") - self.dbg.SetOutputFile(devnull) - self.dbg.SetErrorFile(devnull) + with open(self.stdin_path, "r") as in_fileH, open( + self.stdout_path, "w" + ) as out_fileH: + self.dbg.SetInputFile(in_fileH) + + self.dbg.SetOutputFile(out_fileH) + self.dbg.SetErrorFile(out_fileH) + + n_errors, quit_requested, has_crashed = self.dbg.RunCommandInterpreter( + True, False, options, 0, False, False + ) + + result_string = None + with open(self.stdout_path, "r") as out_fileH: + result_string = out_fileH.read() + + return (n_errors, quit_requested, has_crashed, result_string) def test_run_session_with_error_and_quit(self): """Run non-existing and quit command returns appropriate values""" - n_errors, quit_requested, has_crashed = self.dbg.RunCommandInterpreter( - True, False, lldb.SBCommandInterpreterRunOptions(), 0, False, False + n_errors, quit_requested, has_crashed, _ = self.run_commands_string( + "nonexistingcommand\nquit\n" ) - self.assertGreater(n_errors, 0) self.assertTrue(quit_requested) self.assertFalse(has_crashed) + def test_allow_repeat(self): + """Try auto-repeat of process launch - the command will fail and + the auto-repeat will fail because of no auto-repeat.""" + options = lldb.SBCommandInterpreterRunOptions() + options.SetEchoCommands(False) + options.SetAllowRepeats(True) + + n_errors, quit_requested, has_crashed, result_str = self.run_commands_string( + "process launch\n\n", options + ) + self.assertEqual(n_errors, 2) + self.assertFalse(quit_requested) + self.assertFalse(has_crashed) + + self.assertIn("invalid target", result_str) + self.assertIn("No auto repeat", result_str) + class SBCommandInterpreterRunOptionsCase(TestBase): NO_DEBUG_INFO_TESTCASE = True @@ -86,6 +124,7 @@ class SBCommandInterpreterRunOptionsCase(TestBase): self.assertTrue(opts.GetPrintResults()) self.assertTrue(opts.GetPrintErrors()) self.assertTrue(opts.GetAddToHistory()) + self.assertFalse(opts.GetAllowRepeats()) # Invert values opts.SetStopOnContinue(not opts.GetStopOnContinue()) @@ -95,6 +134,7 @@ class SBCommandInterpreterRunOptionsCase(TestBase): opts.SetPrintResults(not opts.GetPrintResults()) opts.SetPrintErrors(not opts.GetPrintErrors()) opts.SetAddToHistory(not opts.GetAddToHistory()) + opts.SetAllowRepeats(not opts.GetAllowRepeats()) # Check the value changed self.assertTrue(opts.GetStopOnContinue()) @@ -104,3 +144,4 @@ class SBCommandInterpreterRunOptionsCase(TestBase): self.assertFalse(opts.GetPrintResults()) self.assertFalse(opts.GetPrintErrors()) self.assertFalse(opts.GetAddToHistory()) + self.assertTrue(opts.GetAllowRepeats()) -- cgit v1.1 From dc3f8c2f587e9647d1ce86426c2cf317c98f453c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 7 Jun 2024 17:25:57 -0700 Subject: [memprof] Improve deserialization performance in V3 (#94787) We call llvm::sort in a couple of places in the V3 encoding: - We sort Frames by FrameIds for stability of the output. - We sort call stacks in the dictionary order to maximize the length of the common prefix between adjacent call stacks. It turns out that we can improve the deserialization performance by modifying the comparison functions -- without changing the format at all. Both places take advantage of the histogram of Frames -- how many times each Frame occurs in the call stacks. - Frames: We serialize popular Frames in the descending order of popularity for improved cache locality. For two equally popular Frames, we break a tie by serializing one that tends to appear earlier in call stacks. Here, "earlier" means a smaller index within llvm::SmallVector. - Call Stacks: We sort the call stacks to reduce the number of times we follow pointers to parents during deserialization. Specifically, instead of comparing two call stacks in the strcmp style -- integer comparisons of FrameIds, we compare two FrameIds F1 and F2 with Histogram[F1] < Histogram[F2] at respective indexes. Since we encode from the end of the sorted list of call stacks, we tend to encode popular call stacks first. Since the two places use the same histogram, we compute it once and share it in the two places. Sorting the call stacks reduces the number of "jumps" by 74% when we deserialize all MemProfRecords. The cycle and instruction counts go down by 10% and 1.5%, respectively. If we sort the Frames in addition to the call stacks, then the cycle and instruction counts go down by 14% and 1.6%, respectively, relative to the same baseline (that is, without this patch). --- llvm/include/llvm/ProfileData/MemProf.h | 20 +++++++-- llvm/lib/ProfileData/InstrProfWriter.cpp | 43 +++++++++++++++---- llvm/lib/ProfileData/MemProf.cpp | 67 ++++++++++++++++++++++++++++-- llvm/unittests/ProfileData/MemProfTest.cpp | 24 +++++++++-- 4 files changed, 136 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 0e4bb9c..8f5ba9c 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -932,6 +932,18 @@ struct IndexedMemProfData { llvm::MapVector> CallStackData; }; +struct FrameStat { + // The number of occurrences of a given FrameId. + uint64_t Count = 0; + // The sum of indexes where a given FrameId shows up. + uint64_t PositionSum = 0; +}; + +// Compute a histogram of Frames in call stacks. +llvm::DenseMap +computeFrameHistogram(llvm::MapVector> + &MemProfCallStackData); + // Construct a radix tree of call stacks. // // A set of call stacks might look like: @@ -1027,9 +1039,11 @@ public: CallStackRadixTreeBuilder() = default; // Build a radix tree array. - void build(llvm::MapVector> - &&MemProfCallStackData, - const llvm::DenseMap &MemProfFrameIndexes); + void + build(llvm::MapVector> + &&MemProfCallStackData, + const llvm::DenseMap &MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram); const std::vector &getRadixArray() const { return RadixArray; } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 7d7c980..1a9add1 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -494,17 +494,40 @@ static uint64_t writeMemProfFrames( static llvm::DenseMap writeMemProfFrameArray( ProfOStream &OS, - llvm::MapVector &MemProfFrameData) { + llvm::MapVector &MemProfFrameData, + llvm::DenseMap &FrameHistogram) { // Mappings from FrameIds to array indexes. llvm::DenseMap MemProfFrameIndexes; - // Sort the FrameIDs for stability. + // Compute the order in which we serialize Frames. The order does not matter + // in terms of correctness, but we still compute it for deserialization + // performance. Specifically, if we serialize frequently used Frames one + // after another, we have better cache utilization. For two Frames that + // appear equally frequently, we break a tie by serializing the one that tends + // to appear earlier in call stacks. We implement the tie-breaking mechanism + // by computing the sum of indexes within call stacks for each Frame. If we + // still have a tie, then we just resort to compare two FrameIds, which is + // just for stability of output. std::vector> FrameIdOrder; FrameIdOrder.reserve(MemProfFrameData.size()); for (const auto &[Id, Frame] : MemProfFrameData) FrameIdOrder.emplace_back(Id, &Frame); assert(MemProfFrameData.size() == FrameIdOrder.size()); - llvm::sort(FrameIdOrder); + llvm::sort(FrameIdOrder, + [&](const std::pair &L, + const std::pair &R) { + const auto &SL = FrameHistogram[L.first]; + const auto &SR = FrameHistogram[R.first]; + // Popular FrameIds should come first. + if (SL.Count != SR.Count) + return SL.Count > SR.Count; + // If they are equally popular, then the one that tends to appear + // earlier in call stacks should come first. + if (SL.PositionSum != SR.PositionSum) + return SL.PositionSum < SR.PositionSum; + // Compare their FrameIds for sort stability. + return L.first < R.first; + }); // Serialize all frames while creating mappings from linear IDs to FrameIds. uint64_t Index = 0; @@ -543,12 +566,14 @@ writeMemProfCallStackArray( llvm::MapVector> &MemProfCallStackData, llvm::DenseMap - &MemProfFrameIndexes) { + &MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram) { llvm::DenseMap MemProfCallStackIndexes; memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes); + Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + FrameHistogram); for (auto I : Builder.getRadixArray()) OS.write32(I); MemProfCallStackIndexes = Builder.takeCallStackPos(); @@ -704,13 +729,17 @@ static Error writeMemProfV3(ProfOStream &OS, Schema = memprof::getFullSchema(); writeMemProfSchema(OS, Schema); + llvm::DenseMap FrameHistogram = + memprof::computeFrameHistogram(MemProfData.CallStackData); + assert(MemProfData.FrameData.size() == FrameHistogram.size()); + llvm::DenseMap MemProfFrameIndexes = - writeMemProfFrameArray(OS, MemProfData.FrameData); + writeMemProfFrameArray(OS, MemProfData.FrameData, FrameHistogram); uint64_t CallStackPayloadOffset = OS.tell(); llvm::DenseMap MemProfCallStackIndexes = writeMemProfCallStackArray( - OS, MemProfData.CallStackData, MemProfFrameIndexes); + OS, MemProfData.CallStackData, MemProfFrameIndexes, FrameHistogram); uint64_t RecordPayloadOffset = OS.tell(); uint64_t RecordTableOffset = diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 620e2e2..8e30537 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -486,7 +486,8 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( void CallStackRadixTreeBuilder::build( llvm::MapVector> &&MemProfCallStackData, - const llvm::DenseMap &MemProfFrameIndexes) { + const llvm::DenseMap &MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram) { // Take the vector portion of MemProfCallStackData. The vector is exactly // what we need to sort. Also, we no longer need its lookup capability. llvm::SmallVector CallStacks = MemProfCallStackData.takeVector(); @@ -498,14 +499,56 @@ void CallStackRadixTreeBuilder::build( return; } - // Sort the list of call stacks in the dictionary order to maximize the length - // of the common prefix between two adjacent call stacks. + // Sorting the list of call stacks in the dictionary order is sufficient to + // maximize the length of the common prefix between two adjacent call stacks + // and thus minimize the length of RadixArray. However, we go one step + // further and try to reduce the number of times we follow pointers to parents + // during deserilization. Consider a poorly encoded radix tree: + // + // CallStackId 1: f1 -> f2 -> f3 + // | + // CallStackId 2: +--- f4 -> f5 + // | + // CallStackId 3: +--> f6 + // + // Here, f2 and f4 appear once and twice, respectively, in the call stacks. + // Once we encode CallStackId 1 into RadixArray, every other call stack with + // common prefix f1 ends up pointing to CallStackId 1. Since CallStackId 3 + // share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to + // parents twice. + // + // We try to alleviate the situation by sorting the list of call stacks by + // comparing the popularity of frames rather than the integer values of + // FrameIds. In the example above, f4 is more popular than f2, so we sort the + // call stacks and encode them as: + // + // CallStackId 2: f1 -- f4 -> f5 + // | | + // CallStackId 3: | +--> f6 + // | + // CallStackId 1: +--> f2 -> f3 + // + // Notice that CallStackId 3 follows a pointer to a parent only once. + // + // All this is a quick-n-dirty trick to reduce the number of jumps. The + // proper way would be to compute the weight of each radix tree node -- how + // many call stacks use a given radix tree node, and encode a radix tree from + // the heaviest node first. We do not do so because that's a lot of work. llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) { // Call stacks are stored from leaf to root. Perform comparisons from the // root. return std::lexicographical_compare( L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(), - [&](FrameId F1, FrameId F2) { return F1 < F2; }); + [&](FrameId F1, FrameId F2) { + uint64_t H1 = FrameHistogram[F1].Count; + uint64_t H2 = FrameHistogram[F2].Count; + // Popular frames should come later because we encode call stacks from + // the last one in the list. + if (H1 != H2) + return H1 < H2; + // For sort stability. + return F1 < F2; + }); }); // Reserve some reasonable amount of storage. @@ -569,6 +612,22 @@ void CallStackRadixTreeBuilder::build( V = RadixArray.size() - 1 - V; } +llvm::DenseMap +computeFrameHistogram(llvm::MapVector> + &MemProfCallStackData) { + llvm::DenseMap Histogram; + + for (const auto &KV : MemProfCallStackData) { + const auto &CS = KV.second; + for (unsigned I = 0, E = CS.size(); I != E; ++I) { + auto &S = Histogram[CS[I]]; + ++S.Count; + S.PositionSum += I; + } + } + return Histogram; +} + void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) { for (const auto &AS : Record.AllocSites) { assert(AS.CSId == hashCallStack(AS.CallStack)); diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 2642120..15eb59e 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -667,8 +667,12 @@ TEST(MemProf, MissingFrameId) { TEST(MemProf, RadixTreeBuilderEmpty) { llvm::DenseMap MemProfFrameIndexes; llvm::MapVector> MemProfCallStackData; + llvm::DenseMap + FrameHistogram = + llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes); + Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + FrameHistogram); ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty()); const auto Mappings = Builder.takeCallStackPos(); ASSERT_THAT(Mappings, testing::IsEmpty()); @@ -681,8 +685,12 @@ TEST(MemProf, RadixTreeBuilderOne) { llvm::SmallVector CS1 = {13, 12, 11}; llvm::MapVector> MemProfCallStackData; MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1}); + llvm::DenseMap + FrameHistogram = + llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes); + Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ 3U, // Size of CS1, 3U, // MemProfFrameIndexes[13] @@ -704,8 +712,12 @@ TEST(MemProf, RadixTreeBuilderTwo) { llvm::MapVector> MemProfCallStackData; MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1}); MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2}); + llvm::DenseMap + FrameHistogram = + llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes); + Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ 2U, // Size of CS1 @@ -738,8 +750,12 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) { MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2}); MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS3), CS3}); MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS4), CS4}); + llvm::DenseMap + FrameHistogram = + llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes); + Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ 4U, // Size of CS1 -- cgit v1.1 From 96af11494158c38dafb64ffeaec3f371f37f0eb4 Mon Sep 17 00:00:00 2001 From: csstormq Date: Sat, 8 Jun 2024 08:38:27 +0800 Subject: [InstCombine] Preserve the nsw/nuw flags for (X | Op01C) + Op1C --> X + (Op01C + Op1C) (#94586) This patch simplifies `sdiv` to `udiv` by preserving the `nsw` flag for `(X | Op01C) + Op1C --> X + (Op01C + Op1C)` if the sum of `Op01C` and `Op1C` will not overflow, and preserves the `nuw` flag unconditionally. Alive2 Proofs (provided by @nikic): https://alive2.llvm.org/ce/z/nrdCZT, https://alive2.llvm.org/ce/z/YnJHnH --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 10 ++++-- llvm/test/Transforms/InstCombine/add.ll | 40 ++++++++++++++++++++++ .../Transforms/InstCombine/sadd-with-overflow.ll | 2 +- .../PhaseOrdering/AArch64/matrix-extract-insert.ll | 18 +++++----- 4 files changed, 58 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 8205b49..0a73c58 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -905,8 +905,14 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add` Constant *Op01C; - if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) - return BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C)); + if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) { + BinaryOperator *NewAdd = + BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C)); + NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap() && + willNotOverflowSignedAdd(Op01C, Op1C, Add)); + NewAdd->setHasNoUnsignedWrap(Add.hasNoUnsignedWrap()); + return NewAdd; + } // (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C) const APInt *C2; diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 25087fe..239e146 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -1510,6 +1510,46 @@ define i8 @add_like_or_t2_extrause(i8 %x) { %r = add i8 %i1, 42 ret i8 %r } +define i8 @fold_add_constant_preserve_nsw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_preserve_nsw( +; CHECK-NEXT: [[ADD:%.*]] = add nsw i8 [[X:%.*]], -120 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, -128 + %add = add nsw i8 %or, 8 + ret i8 %add +} +define i8 @fold_add_constant_no_nsw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_no_nsw( +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 120 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, -128 + %add = add nsw i8 %or, -8 + ret i8 %add +} +define i8 @fold_add_constant_preserve_nuw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_preserve_nuw( +; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X:%.*]], -116 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, 128 + %add = add nuw i8 %or, 12 + ret i8 %add +} +define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: @sdiv_to_udiv( +; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8 +; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242049 +; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192 +; CHECK-NEXT: ret i32 [[T3]] +; + %t0 = shl nuw nsw i32 %arg0, 8 + %t1 = or disjoint i32 %t0, 1 + %t2 = add nuw nsw i32 %t1, 6242048 + %t3 = sdiv i32 %t2, 192 + ret i32 %t3 +} define i8 @add_like_or_disjoint(i8 %x) { ; CHECK-LABEL: @add_like_or_disjoint( diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll index 729ca03..e4dd2d1 100644 --- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll @@ -125,7 +125,7 @@ define { i32, i1 } @fold_sub_simple(i32 %x) { define { i32, i1 } @fold_with_distjoin_or(i32 %x) { ; CHECK-LABEL: @fold_with_distjoin_or( -; CHECK-NEXT: [[B:%.*]] = add i32 [[X:%.*]], 6 +; CHECK-NEXT: [[B:%.*]] = add nsw i32 [[X:%.*]], 6 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[B]], 0 ; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index 5cbf50e..db16413 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -182,11 +182,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.1: ; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] ; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15 -; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[INDEX_1]], 16 +; CHECK-NEXT: [[TMP34:%.*]] = add nuw nsw i64 [[INDEX_1]], 16 ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[INDEX_1]], 17 -; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[INDEX_1]], 18 +; CHECK-NEXT: [[TMP37:%.*]] = add nuw nsw i64 [[INDEX_1]], 17 +; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18 ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 ; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], @@ -259,11 +259,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.2: ; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] ; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30 -; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX_2]], 31 +; CHECK-NEXT: [[TMP65:%.*]] = add nuw nsw i64 [[INDEX_2]], 31 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1 -; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[INDEX_2]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[INDEX_2]], 33 +; CHECK-NEXT: [[TMP68:%.*]] = add nuw nsw i64 [[INDEX_2]], 32 +; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33 ; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 ; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 ; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], @@ -336,11 +336,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.3: ; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] ; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45 -; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[INDEX_3]], 46 +; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[INDEX_3]], 46 ; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0 ; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1 -; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX_3]], 47 -; CHECK-NEXT: [[TMP100:%.*]] = add i64 [[INDEX_3]], 48 +; CHECK-NEXT: [[TMP99:%.*]] = add nuw nsw i64 [[INDEX_3]], 47 +; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48 ; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 ; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 ; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], -- cgit v1.1 From 608fb463d20e70668cf4dd3f0c58bd3de91c42eb Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Fri, 7 Jun 2024 17:56:35 -0700 Subject: [lld] Discard SHT_LLVM_LTO sections in relocatable links (#92825) So long as ld -r links using bitcode always result in an ELF object, and not a merged bitcode object, the output form a relocatable link using FatLTO objects should not have a .llvm.lto section. Prior to this, using the object code sections would cause the bitcode section in the output of a relocatable link to be corrupted, by concatenating all the .llvm.lto sections together. This patch discards SHT_LLVM_LTO sections when not using --fat-lto-objects, so that the relocatable ELF output won't contain inalid bitcode. --- lld/ELF/InputFiles.cpp | 10 ++++++++++ lld/test/ELF/fatlto/fatlto.test | 17 ++++------------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 9021bbd..e6a0a5b 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -844,6 +844,16 @@ void ObjFile::initializeSections(bool ignoreComdats, this->sections[i] = createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); break; + case SHT_LLVM_LTO: + // Discard .llvm.lto in a relocatable link that does not use the bitcode. + // The concatenated output does not properly reflect the linking + // semantics. In addition, since we do not use the bitcode wrapper format, + // the concatenated raw bitcode would be invalid. + if (config->relocatable && !config->fatLTOObjects) { + sections[i] = &InputSection::discarded; + break; + } + [[fallthrough]]; default: this->sections[i] = createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); diff --git a/lld/test/ELF/fatlto/fatlto.test b/lld/test/ELF/fatlto/fatlto.test index ed13708..7ec094d 100644 --- a/lld/test/ELF/fatlto/fatlto.test +++ b/lld/test/ELF/fatlto/fatlto.test @@ -8,7 +8,6 @@ ; RUN: opt < a-LTO.ll --module-summary -o a-fatLTO.bc ; RUN: llvm-objcopy --add-section=.llvm.lto=a-fatLTO.bc --set-section-flags=.llvm.lto=exclude --set-section-type=.llvm.lto=0x6fff4c0c a-fatLTO.o - ; RUN: llc main-LTO.ll --filetype=obj -o main-fatLTO.o --relocation-model=pic ; RUN: opt < main-LTO.ll --module-summary -o main-fatLTO.bc ; RUN: llvm-objcopy --add-section=.llvm.lto=main-fatLTO.bc --set-section-flags=.llvm.lto=exclude --set-section-type=.llvm.lto=0x6fff4c0c main-fatLTO.o @@ -17,11 +16,6 @@ ; RUN: llvm-readelf -S main-fatLTO.o | FileCheck --check-prefix=HAS_LLVM_LTO %s ;; Make sure that the section flags are set correctly -; HA_LLVM_LTO: Name: .llvm.lto -; HA_LLVM_LTO-NEXT: Type: SHT_LLVM_LTO -; HA_LLVM_LTO-NEXT: Flags -; HA_LLVM_LTO-NEXT: SHF_EXCLUDE - ; HAS_LLVM_LTO: Name Type Address Off Size ES Flg Lk Inf Al ; HAS_LLVM_LTO: .llvm.lto LLVM_LTO {{.*}} 00 WE 0 0 1 @@ -64,16 +58,13 @@ ; RUN: ld.lld -o foo-fatLTO.archive a.a main-LTO.bc --fat-lto-objects ; RUN: cmp foo-fatLTO.archive foo-LTO -;; Test FatLTO works with relocatable links using PIC objects -;; Currently, with PIC relocatable links, FatLTO sections are treated as -;; orphan sections and incorrectly concatenated together. This test verifies -;; the current behavior, but should be fixed to either merge those sections -;; correctly, or to drop them altogether. +;; Test FatLTO works with relocatable links using PIC objects, and that +;; SHT_LLVM_LTO sections are discarded. ; RUN: llvm-ar rcs fatLTO-pic.a a-fatLTO.o main-fatLTO.o ; RUN: llvm-readelf -S fatLTO-pic.a | FileCheck --check-prefix=HAS_LLVM_LTO %s -; RUN: ld.lld --whole-archive fatLTO-pic.a -r -o fatLTO-pic-reolcatable.o -; RUN: llvm-readelf -S fatLTO-pic-reolcatable.o | FileCheck --check-prefix=HAS_LLVM_LTO %s +; RUN: ld.lld --whole-archive fatLTO-pic.a -r -o fatLTO-pic-relocatable.o +; RUN: llvm-readelf -S fatLTO-pic-relocatable.o | FileCheck --check-prefix=CHECK-NON-LTO-TARGET %s ;--- a-LTO.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -- cgit v1.1 From 4c28844e3b1c0fd71c55e5954d21c6455d93068a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 7 Jun 2024 18:18:31 -0700 Subject: [ProfileData] Use default member initialization (NFC) (#94817) While we are at it, this patch changes the type of ValueCounts to std:array so that we can use std::array:fill. Identified with modernize-use-default-member-init. --- llvm/include/llvm/ProfileData/InstrProf.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 15b9eb6..61bf949 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -737,15 +737,14 @@ GlobalVariable *InstrProfSymtab::getGlobalVariable(uint64_t MD5Hash) { // To store the sums of profile count values, or the percentage of // the sums of the total count values. struct CountSumOrPercent { - uint64_t NumEntries; - double CountSum; - double ValueCounts[IPVK_Last - IPVK_First + 1]; - CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {} + uint64_t NumEntries = 0; + double CountSum = 0.0f; + std::array ValueCounts = {}; + CountSumOrPercent() = default; void reset() { NumEntries = 0; CountSum = 0.0f; - for (double &VC : ValueCounts) - VC = 0.0f; + ValueCounts.fill(0.0f); } }; @@ -761,15 +760,13 @@ struct OverlapStats { CountSumOrPercent Mismatch; CountSumOrPercent Unique; OverlapStatsLevel Level; - const std::string *BaseFilename; - const std::string *TestFilename; + const std::string *BaseFilename = nullptr; + const std::string *TestFilename = nullptr; StringRef FuncName; - uint64_t FuncHash; - bool Valid; + uint64_t FuncHash = 0; + bool Valid = false; - OverlapStats(OverlapStatsLevel L = ProgramLevel) - : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0), - Valid(false) {} + OverlapStats(OverlapStatsLevel L = ProgramLevel) : Level(L) {} void dump(raw_fd_ostream &OS) const; -- cgit v1.1 From 4cff8ef86fffab8e117aea580977d759754c9910 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 7 Jun 2024 18:25:49 -0700 Subject: [ProfileData] Use DenseMap::lookup (NFC) (#94818) --- llvm/include/llvm/ProfileData/InstrProf.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 61bf949..d6831ee 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -729,9 +729,7 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { } GlobalVariable *InstrProfSymtab::getGlobalVariable(uint64_t MD5Hash) { - if (auto Iter = MD5VTableMap.find(MD5Hash); Iter != MD5VTableMap.end()) - return Iter->second; - return nullptr; + return MD5VTableMap.lookup(MD5Hash); } // To store the sums of profile count values, or the percentage of -- cgit v1.1 From 18c67bf2658284431331da21c2ea6fe562434b1d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 7 Jun 2024 21:24:25 -0400 Subject: [gn build] Port 37e309f16354 (AArch64 loopvectorize) --- llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn index 879b7f0..2ffe83d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -103,6 +103,7 @@ static_library("LLVMAArch64CodeGen") { "//llvm/lib/Transforms/CFGuard", "//llvm/lib/Transforms/Scalar", "//llvm/lib/Transforms/Utils", + "//llvm/lib/Transforms/Vectorize", ] include_dirs = [ "." ] sources = [ @@ -130,7 +131,6 @@ static_library("LLVMAArch64CodeGen") { "AArch64ISelLowering.cpp", "AArch64InstrInfo.cpp", "AArch64LoadStoreOptimizer.cpp", - "AArch64LoopIdiomTransform.cpp", "AArch64LowerHomogeneousPrologEpilog.cpp", "AArch64MCInstLower.cpp", "AArch64MIPeepholeOpt.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn index 044b781..92337a5 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn @@ -8,6 +8,7 @@ static_library("Vectorize") { ] sources = [ "LoadStoreVectorizer.cpp", + "LoopIdiomVectorize.cpp", "LoopVectorizationLegality.cpp", "LoopVectorize.cpp", "SLPVectorizer.cpp", -- cgit v1.1 From 4e0ff05460c0b898bcc07fba8ad1e8de91706e6f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 7 Jun 2024 18:32:58 -0700 Subject: [memprof] Remove extraneous memprof:: (NFC) (#94825) --- llvm/include/llvm/ProfileData/MemProf.h | 20 +++++++++----------- llvm/include/llvm/ProfileData/MemProfReader.h | 4 ++-- llvm/lib/ProfileData/MemProf.cpp | 8 ++++---- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 8f5ba9c..a650149 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -439,7 +439,7 @@ struct IndexedMemProfRecord { // on the schema provided in \p Schema. void serialize(const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, - llvm::DenseMap + llvm::DenseMap *MemProfCallStackIndexes = nullptr) const; // Deserializes memprof records from the Buffer. @@ -579,15 +579,14 @@ private: IndexedVersion Version; // Mappings from CallStackId to the indexes into the call stack array. - llvm::DenseMap - *MemProfCallStackIndexes; + llvm::DenseMap *MemProfCallStackIndexes; public: // We do not support the default constructor, which does not set Version. RecordWriterTrait() = delete; - RecordWriterTrait(const MemProfSchema *Schema, IndexedVersion V, - llvm::DenseMap - *MemProfCallStackIndexes) + RecordWriterTrait( + const MemProfSchema *Schema, IndexedVersion V, + llvm::DenseMap *MemProfCallStackIndexes) : Schema(Schema), Version(V), MemProfCallStackIndexes(MemProfCallStackIndexes) {} @@ -1039,11 +1038,10 @@ public: CallStackRadixTreeBuilder() = default; // Build a radix tree array. - void - build(llvm::MapVector> - &&MemProfCallStackData, - const llvm::DenseMap &MemProfFrameIndexes, - llvm::DenseMap &FrameHistogram); + void build(llvm::MapVector> + &&MemProfCallStackData, + const llvm::DenseMap &MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram); const std::vector &getRadixArray() const { return RadixArray; } diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index b42e4f5..f028682 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -76,8 +76,8 @@ public: Callback = std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1); - memprof::CallStackIdConverter CSIdConv( - CSIdToCallStack, Callback); + CallStackIdConverter CSIdConv(CSIdToCallStack, + Callback); const IndexedMemProfRecord &IndexedRecord = Iter->second; GuidRecord = { diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 8e30537..4ca8687 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -343,15 +343,15 @@ MemProfRecord IndexedMemProfRecord::toMemProfRecord( MemProfRecord Record; Record.AllocSites.reserve(AllocSites.size()); - for (const memprof::IndexedAllocationInfo &IndexedAI : AllocSites) { - memprof::AllocationInfo AI; + for (const IndexedAllocationInfo &IndexedAI : AllocSites) { + AllocationInfo AI; AI.Info = IndexedAI.Info; AI.CallStack = Callback(IndexedAI.CSId); Record.AllocSites.push_back(std::move(AI)); } Record.CallSites.reserve(CallSiteIds.size()); - for (memprof::CallStackId CSId : CallSiteIds) + for (CallStackId CSId : CallSiteIds) Record.CallSites.push_back(Callback(CSId)); return Record; @@ -487,7 +487,7 @@ void CallStackRadixTreeBuilder::build( llvm::MapVector> &&MemProfCallStackData, const llvm::DenseMap &MemProfFrameIndexes, - llvm::DenseMap &FrameHistogram) { + llvm::DenseMap &FrameHistogram) { // Take the vector portion of MemProfCallStackData. The vector is exactly // what we need to sort. Also, we no longer need its lookup capability. llvm::SmallVector CallStacks = MemProfCallStackData.takeVector(); -- cgit v1.1 From 4d95850d052336a785651030eafa0b24651801a0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 8 Jun 2024 01:33:19 +0000 Subject: [gn build] Port c4f83a004bf3 --- llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn index 36957f5..0dc5efc 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn @@ -55,5 +55,6 @@ static_library("misc") { "UnusedParametersCheck.cpp", "UnusedUsingDeclsCheck.cpp", "UseAnonymousNamespaceCheck.cpp", + "UseInternalLinkageCheck.cpp", ] } -- cgit v1.1 From 5422b5f0287d6f0920abf1b868985575885b4d5b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 17:28:59 -0700 Subject: [RISCV] Rename VPseudoVWALU_VV_VX_VI to VPseudoVWSLL. NFC The scheduler class name is hardcoded in the class so its not a general class. --- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index d091077..957d295 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -452,16 +452,16 @@ multiclass VPseudoVCPOP { } } -multiclass VPseudoVWALU_VV_VX_VI { +multiclass VPseudoVWSLL { foreach m = MxListW in { defvar mx = m.MX; defm "" : VPseudoBinaryW_VV, SchedBinary<"WriteVWSLLV", "ReadVWSLLV", "ReadVWSLLV", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryW_VX, + defm "" : VPseudoBinaryW_VX, SchedBinary<"WriteVWSLLX", "ReadVWSLLV", "ReadVWSLLX", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryW_VI, + defm "" : VPseudoBinaryW_VI, SchedUnary<"WriteVWSLLI", "ReadVWSLLV", mx, forceMergeOpRead=true>; } @@ -525,7 +525,7 @@ let Predicates = [HasStdExtZvbb] in { defm PseudoVCLZ : VPseudoVCLZ; defm PseudoVCTZ : VPseudoVCTZ; defm PseudoVCPOP : VPseudoVCPOP; - defm PseudoVWSLL : VPseudoVWALU_VV_VX_VI; + defm PseudoVWSLL : VPseudoVWSLL; } // Predicates = [HasStdExtZvbb] let Predicates = [HasStdExtZvbc] in { -- cgit v1.1 From 5fc1b82277dc4dd4ef133432ac8d8b1fa300d7c5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 17:34:11 -0700 Subject: [RISCV] Refactor VPseudoVROL and VPseudoVROR multiclasses to use inheritance. NFC VPseudoVROR can inherit from VPseudoVROL. Adjust the names to VPseudoVROT_VV_VX and VPseudoVROT_VV_VX_VI. --- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 957d295..fd48233 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -494,7 +494,7 @@ multiclass VPseudoVREV8 { } } -multiclass VPseudoVROL { +multiclass VPseudoVROT_VV_VX { foreach m = MxList in { defm "" : VPseudoBinaryV_VV, SchedBinary<"WriteVRotV", "ReadVRotV", "ReadVRotV", m.MX, @@ -505,18 +505,12 @@ multiclass VPseudoVROL { } } -multiclass VPseudoVROR { - defvar Constraint = ""; +multiclass VPseudoVROT_VV_VX_VI + : VPseudoVROT_VV_VX { foreach m = MxList in { - defvar mx = m.MX; - defm "" : VPseudoBinaryV_VV, - SchedBinary<"WriteVRotV", "ReadVRotV", "ReadVRotV", mx, - forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VX, - SchedBinary<"WriteVRotX", "ReadVRotV", "ReadVRotX", mx, - forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VI, - SchedUnary<"WriteVRotI", "ReadVRotV", mx, forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VI, + SchedUnary<"WriteVRotI", "ReadVRotV", m.MX, + forceMergeOpRead=true>; } } @@ -537,8 +531,8 @@ let Predicates = [HasStdExtZvkb] in { defm PseudoVANDN : VPseudoVANDN; defm PseudoVBREV8 : VPseudoVBREV8; defm PseudoVREV8 : VPseudoVREV8; - defm PseudoVROL : VPseudoVROL; - defm PseudoVROR : VPseudoVROR; + defm PseudoVROL : VPseudoVROT_VV_VX; + defm PseudoVROR : VPseudoVROT_VV_VX_VI; } // Predicates = [HasStdExtZvkb] let Predicates = [HasStdExtZvkg] in { -- cgit v1.1 From 7d203b1cdb004a609c25d676cdb06c2e9eff3a59 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 17:49:50 -0700 Subject: [RISCV] Rename VPseudoBinaryNoMaskTU->VPseudoBinaryNoMaskPolicy. NFC These pseudoinstructions have a policy operand so calling them TU is confusing. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 18 ++++++++-------- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 28 ++++++++++++------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 818073d..ef52f57 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1234,11 +1234,11 @@ class VPseudoBinaryNoMask : +class VPseudoBinaryNoMaskPolicy : Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, @@ -2138,8 +2138,8 @@ multiclass VPseudoBinary { let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); - def suffix : VPseudoBinaryNoMaskTU; + def suffix : VPseudoBinaryNoMaskPolicy; def suffix # "_MASK" : VPseudoBinaryMaskPolicy, RISCVMaskedPseudo; @@ -2197,8 +2197,8 @@ multiclass VPseudoBinaryEmul { let VLMul = lmul.value, SEW=sew in { defvar suffix = !if(sew, "_" # lmul.MX # "_E" # sew, "_" # lmul.MX); - def suffix # "_" # emul.MX : VPseudoBinaryNoMaskTU; + def suffix # "_" # emul.MX : VPseudoBinaryNoMaskPolicy; def suffix # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskPolicy, RISCVMaskedPseudo; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index fd48233..82b3b61 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -247,23 +247,23 @@ class VPseudoTernaryNoMask_Zvk(PseudoToVInst.VInst); } -multiclass VPseudoBinaryNoMaskTU_Zvk { +multiclass VPseudoBinaryNoMaskPolicy_Zvk { let VLMul = MInfo.value, SEW=sew in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); - def suffix : VPseudoBinaryNoMaskTU; + def suffix : VPseudoBinaryNoMaskPolicy; } } multiclass VPseudoTernaryNoMask_Zvk { + VReg Op1Class, + DAGOperand Op2Class, + LMULInfo MInfo> { let VLMul = MInfo.value in def "_" # MInfo.MX : VPseudoTernaryNoMask_Zvk; } @@ -349,7 +349,7 @@ multiclass VPseudoVSHA2MS { multiclass VPseudoVAESKF1 { foreach m = MxListVF4 in { defvar mx = m.MX; - defm _VI : VPseudoBinaryNoMaskTU_Zvk, + defm _VI : VPseudoBinaryNoMaskPolicy_Zvk, SchedBinary<"WriteVAESKF1V", "ReadVAESKF1V", "ReadVAESKF1V", mx, forceMergeOpRead=true>; } @@ -384,7 +384,7 @@ multiclass VPseudoVSM3C { multiclass VPseudoVSM4K { foreach m = MxListVF4 in { defvar mx = m.MX; - defm _VI : VPseudoBinaryNoMaskTU_Zvk, + defm _VI : VPseudoBinaryNoMaskPolicy_Zvk, SchedBinary<"WriteVSM4KV", "ReadVSM4KV", "ReadVSM4KV", mx, forceMergeOpRead=true>; } @@ -393,7 +393,7 @@ multiclass VPseudoVSM4K { multiclass VPseudoVSM3ME { foreach m = MxListVF4 in { defvar mx = m.MX; - defm _VV : VPseudoBinaryNoMaskTU_Zvk, + defm _VV : VPseudoBinaryNoMaskPolicy_Zvk, SchedBinary<"WriteVSM3MEV", "ReadVSM3MEV", "ReadVSM3MEV", mx, forceMergeOpRead=true>; } -- cgit v1.1 From 5e94163edb5ad763ff6a3f6c29935b9c6780060b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 18:04:04 -0700 Subject: [RISCV] Rename VPatBinarySwapped to VPatBinaryMSwapped. NFC This class is most closely related to VPatBinaryM. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 52 ++++++++++++------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index ef52f57..1a51481 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -4161,15 +4161,15 @@ class VPatBinaryNoMaskTURoundingMode; -// Same as above but source operands are swapped. -class VPatBinaryNoMaskSwapped : +// Same as VPatBinaryM but source operands are swapped. +class VPatBinaryMSwapped : Pat<(result_type (!cast(intrinsic_name) (op2_type op2_kind:$rs2), (op1_type op1_reg_class:$rs1), @@ -4248,7 +4248,7 @@ class VPatBinaryMaskTARoundingMode; -// Same as above but source operands are swapped. +// Same as VPatBinaryMask but source operands are swapped. class VPatBinaryMaskSwapped; } -multiclass VPatBinarySwapped { - def : VPatBinaryNoMaskSwapped; +multiclass VPatBinaryMSwapped { + def : VPatBinaryMSwapped; def : VPatBinaryMaskSwapped; @@ -5406,10 +5406,10 @@ multiclass VPatBinarySwappedM_VV vtilist> { foreach vti = vtilist in let Predicates = GetVTypePredicates.Predicates in - defm : VPatBinarySwapped; + defm : VPatBinaryMSwapped; } multiclass VPatBinaryM_VX Date: Fri, 7 Jun 2024 18:42:21 -0700 Subject: [RISCV] Flatten VPatBinaryW_VI_VWSLL and VPatBinaryW_VX_VWSLL into VPatBinaryW_VV_VX_VI_VWSLL. NFC --- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 38 ++++++++++-------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 82b3b61..98b5aee 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -1031,39 +1031,27 @@ multiclass VPatBinaryV_VV_VX_VI_VROR, VPatBinaryV_VI; -multiclass VPatBinaryW_VI_VWSLL vtilist> { - foreach VtiToWti = vtilist in { - defvar Vti = VtiToWti.Vti; - defvar Wti = VtiToWti.Wti; - defm : VPatBinary; - } -} - -multiclass VPatBinaryW_VX_VWSLL vtilist> { +multiclass VPatBinaryW_VV_VX_VI_VWSLL vtilist> + : VPatBinaryW_VV { foreach VtiToWti = vtilist in { defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; defvar kind = "V"#Vti.ScalarSuffix; let Predicates = !listconcat(GetVTypePredicates.Predicates, - GetVTypePredicates.Predicates) in - defm : VPatBinary; + GetVTypePredicates.Predicates) in { + defm : VPatBinary; + defm : VPatBinary; + } } } -multiclass VPatBinaryW_VV_VX_VI_VWSLL vtilist> - : VPatBinaryW_VV, - VPatBinaryW_VX_VWSLL, - VPatBinaryW_VI_VWSLL; - let Predicates = [HasStdExtZvbb] in { defm : VPatUnaryV_V<"int_riscv_vbrev", "PseudoVBREV", AllIntegerVectors>; defm : VPatUnaryV_V<"int_riscv_vclz", "PseudoVCLZ", AllIntegerVectors>; -- cgit v1.1 From 81671fe0e21e25f31b8100297f829744e66df4bd Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 7 Jun 2024 19:02:55 -0700 Subject: [workflows] Add post-commit job that periodically runs the clang static analyzer (#94106) This job will run once per day on the main branch, and for every commit on a release branch. It currently only builds llvm, but could add more sub-projects in the future. OpenSSF Best Practices recommends running a static analyzer on software before it is released: https://www.bestpractices.dev/en/criteria/0#0.static_analysis --- .github/workflows/ci-post-commit-analyzer-run.py | 34 +++++++++ .github/workflows/ci-post-commit-analyzer.yml | 95 ++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 .github/workflows/ci-post-commit-analyzer-run.py create mode 100644 .github/workflows/ci-post-commit-analyzer.yml diff --git a/.github/workflows/ci-post-commit-analyzer-run.py b/.github/workflows/ci-post-commit-analyzer-run.py new file mode 100644 index 0000000..e5f52d3 --- /dev/null +++ b/.github/workflows/ci-post-commit-analyzer-run.py @@ -0,0 +1,34 @@ +import json +import multiprocessing +import os +import re +import subprocess +import sys + + +def run_analyzer(data): + os.chdir(data["directory"]) + command = ( + data["command"] + + f" --analyze --analyzer-output html -o analyzer-results -Xclang -analyzer-config -Xclang max-nodes=75000" + ) + print(command) + subprocess.run(command, shell=True, check=True) + + +def pool_error(e): + print("Error analyzing file:", e) + + +def main(): + db_path = sys.argv[1] + database = json.load(open(db_path)) + + with multiprocessing.Pool() as pool: + pool.map_async(run_analyzer, [k for k in database], error_callback=pool_error) + pool.close() + pool.join() + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/ci-post-commit-analyzer.yml b/.github/workflows/ci-post-commit-analyzer.yml new file mode 100644 index 0000000..d614dd0 --- /dev/null +++ b/.github/workflows/ci-post-commit-analyzer.yml @@ -0,0 +1,95 @@ +name: Post-Commit Static Analyzer + +permissions: + contents: read + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - 'llvm/**' + - '.github/workflows/ci-post-commit-analyzer.yml' + pull_request: + types: + - opened + - synchronize + - reopened + - closed + paths: + - '.github/workflows/ci-post-commit-analyzer.yml' + - '.github/workflows/ci-post-commit-analyzer-run.py' + schedule: + - cron: '30 0 * * *' + +concurrency: + group: >- + llvm-project-${{ github.workflow }}-${{ github.event_name == 'pull_request' && + ( github.event.pull_request.number || github.ref) }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + post-commit-analyzer: + if: >- + github.repository_owner == 'llvm' && + github.event.action != 'closed' + runs-on: ubuntu-22.04 + container: + image: 'ghcr.io/llvm/ci-ubuntu-22.04:latest' + env: + LLVM_VERSION: 18 + steps: + - name: Checkout Source + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Setup ccache + uses: hendrikmuhs/ccache-action@v1 + with: + # A full build of llvm, clang, lld, and lldb takes about 250MB + # of ccache space. There's not much reason to have more than this, + # because we usually won't need to save cache entries from older + # builds. Also, there is an overall 10GB cache limit, and each + # run creates a new cache entry so we want to ensure that we have + # enough cache space for all the tests to run at once and still + # fit under the 10 GB limit. + # Default to 2G to workaround: https://github.com/hendrikmuhs/ccache-action/issues/174 + max-size: 2G + key: post-commit-analyzer + variant: sccache + + - name: Configure + run: | + cmake -B build -S llvm -G Ninja \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DLLVM_ENABLE_PROJECTS=clang \ + -DLLVM_BUILD_LLVM_DYLIB=ON \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DLLVM_INCLUDE_TESTS=OFF \ + -DCLANG_INCLUDE_TESTS=OFF \ + -DCMAKE_BUILD_TYPE=Release + + - name: Build + run: | + # FIXME: We need to build all the generated header files in order to be able to run + # the analyzer on every file. Building libLLVM and libclang is probably overkill for + # this, but it's better than building every target. + ninja -v -C build libLLVM.so libclang.so + + # Run the analyzer. + python3 .github/workflows/ci-post-commit-analyzer-run.py build/compile_commands.json + + scan-build --generate-index-only build/analyzer-results + + - name: Upload Results + uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0 + if: always() + with: + name: analyzer-results + path: 'build/analyzer-results/*' + -- cgit v1.1 From c0a1214c0c91a9a8df1a5758feebd9f1b99c6242 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Sat, 8 Jun 2024 11:55:40 +0800 Subject: [mlir] Handle the newly-added "Reserved" FramePointerKind for 1a5239251ead73ee57f4e2f7fc93433ac7cf18b1 (NFC) /llvm-project/mlir/lib/Target/LLVMIR/ModuleImport.cpp:48: tools/mlir/include/mlir/Dialect/LLVMIR/LLVMConversionEnumsFromLLVM.inc:158:11: error: enumeration value 'Reserved' not handled in switch [-Werror,-Wswitch] switch (value) { ^~~~~ 1 error generated. --- mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td index a93964a..f8e8500 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td @@ -716,12 +716,15 @@ def FramePointerKindNonLeaf : LLVM_EnumAttrCase<"NonLeaf", "non-leaf", "NonLeaf", 1>; def FramePointerKindAll : LLVM_EnumAttrCase<"All", "all", "All", 2>; +def FramePointerKindReserved + : LLVM_EnumAttrCase<"Reserved", "reserved", "Reserved", 3>; def FramePointerKindEnum : LLVM_EnumAttr< "FramePointerKind", "::llvm::FramePointerKind", "LLVM FramePointerKind", - [FramePointerKindNone, FramePointerKindNonLeaf, FramePointerKindAll]> { + [FramePointerKindNone, FramePointerKindNonLeaf, + FramePointerKindAll, FramePointerKindReserved]> { let cppNamespace = "::mlir::LLVM::framePointerKind"; } -- cgit v1.1 From 221336c8e2b0d2d1979cdaf198ab0afcf930d795 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 7 Jun 2024 21:10:13 -0700 Subject: [dfsan] Fix release_shadow_space.c (#94770) DFSan's sscanf is incorrect (https://github.com/llvm/llvm-project/issues/94769), which results in erroneous matches when scraping RSS from /proc/maps. This patch works around the issue by using strstr as a secondary check. It also adds a loose validity check for the initial RSS measurement, to guard against regressions in get_rss_kb(). Fixes https://github.com/llvm/llvm-project/issues/91287 --- compiler-rt/test/dfsan/release_shadow_space.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/compiler-rt/test/dfsan/release_shadow_space.c b/compiler-rt/test/dfsan/release_shadow_space.c index 60dec98..0f0e1a9 100644 --- a/compiler-rt/test/dfsan/release_shadow_space.c +++ b/compiler-rt/test/dfsan/release_shadow_space.c @@ -3,9 +3,6 @@ // DFSAN_OPTIONS=no_huge_pages_for_shadow=false RUN: %clang_dfsan %s -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 -o %t && %run %t // DFSAN_OPTIONS=no_huge_pages_for_shadow=true RUN: %clang_dfsan %s -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 -o %t && %run %t -// This test is flaky right now: https://github.com/llvm/llvm-project/issues/91287 -// UNSUPPORTED: target={{.*}} - #include #include #include @@ -26,7 +23,11 @@ size_t get_rss_kb() { char buf[256]; while (fgets(buf, sizeof(buf), f) != NULL) { int64_t rss; - if (sscanf(buf, "Rss: %ld kB", &rss) == 1) + // DFSan's sscanf is broken and doesn't check for ordinary characters in + // the format string, hence we use strstr as a secondary check + // (https://github.com/llvm/llvm-project/issues/94769). + if ((sscanf(buf, "Rss: %ld kB", &rss) == 1) && + (strstr(buf, "Rss: ") != NULL)) ret += rss; } assert(feof(f)); @@ -73,6 +74,11 @@ int main(int argc, char **argv) { before, after_mmap, after_mmap_and_set_label, after_fixed_mmap, after_mmap_and_set_label2, after_munmap); + // This is orders of magnitude larger than we expect (typically < 10,000KB). + // It is a quick check to ensure that the RSS calculation function isn't + // egregriously wrong. + assert(before < 1000000); + const size_t mmap_cost_kb = map_size >> 10; // Shadow space (1:1 with application memory) const size_t mmap_shadow_cost_kb = sizeof(dfsan_label) * mmap_cost_kb; -- cgit v1.1 From 5d87ba1c1f584dfbd5afaf187099b43681b2206d Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 7 Jun 2024 21:30:04 -0700 Subject: [HLSL] Use llvm::Triple::EnvironmentType instead of HLSLShaderAttr::ShaderType (#93847) `HLSLShaderAttr::ShaderType` enum is a subset of `llvm::Triple::EnvironmentType`. We can use `llvm::Triple::EnvironmentType` directly and avoid converting one enum to another. --- clang/include/clang/Basic/Attr.td | 27 ++------- clang/include/clang/Sema/SemaHLSL.h | 6 +- clang/lib/CodeGen/CGHLSLRuntime.cpp | 2 +- clang/lib/Sema/SemaHLSL.cpp | 109 ++++++++++++++++++++---------------- 4 files changed, 69 insertions(+), 75 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 17d9a71..b70b0c8 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -4470,37 +4470,20 @@ def HLSLShader : InheritableAttr { let Subjects = SubjectList<[HLSLEntry]>; let LangOpts = [HLSL]; let Args = [ - EnumArgument<"Type", "ShaderType", /*is_string=*/true, + EnumArgument<"Type", "llvm::Triple::EnvironmentType", /*is_string=*/true, ["pixel", "vertex", "geometry", "hull", "domain", "compute", "raygeneration", "intersection", "anyhit", "closesthit", "miss", "callable", "mesh", "amplification"], ["Pixel", "Vertex", "Geometry", "Hull", "Domain", "Compute", "RayGeneration", "Intersection", "AnyHit", "ClosestHit", - "Miss", "Callable", "Mesh", "Amplification"]> + "Miss", "Callable", "Mesh", "Amplification"], + /*opt=*/0, /*fake=*/0, /*isExternalType=*/1> ]; let Documentation = [HLSLSV_ShaderTypeAttrDocs]; let AdditionalMembers = [{ - static const unsigned ShaderTypeMaxValue = (unsigned)HLSLShaderAttr::Amplification; - - static llvm::Triple::EnvironmentType getTypeAsEnvironment(HLSLShaderAttr::ShaderType ShaderType) { - switch (ShaderType) { - case HLSLShaderAttr::Pixel: return llvm::Triple::Pixel; - case HLSLShaderAttr::Vertex: return llvm::Triple::Vertex; - case HLSLShaderAttr::Geometry: return llvm::Triple::Geometry; - case HLSLShaderAttr::Hull: return llvm::Triple::Hull; - case HLSLShaderAttr::Domain: return llvm::Triple::Domain; - case HLSLShaderAttr::Compute: return llvm::Triple::Compute; - case HLSLShaderAttr::RayGeneration: return llvm::Triple::RayGeneration; - case HLSLShaderAttr::Intersection: return llvm::Triple::Intersection; - case HLSLShaderAttr::AnyHit: return llvm::Triple::AnyHit; - case HLSLShaderAttr::ClosestHit: return llvm::Triple::ClosestHit; - case HLSLShaderAttr::Miss: return llvm::Triple::Miss; - case HLSLShaderAttr::Callable: return llvm::Triple::Callable; - case HLSLShaderAttr::Mesh: return llvm::Triple::Mesh; - case HLSLShaderAttr::Amplification: return llvm::Triple::Amplification; - } - llvm_unreachable("unknown enumeration value"); + static bool isValidShaderType(llvm::Triple::EnvironmentType ShaderType) { + return ShaderType >= llvm::Triple::Pixel && ShaderType <= llvm::Triple::Amplification; } }]; } diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index e145f5e..0e41a72 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -39,7 +39,7 @@ public: const AttributeCommonInfo &AL, int X, int Y, int Z); HLSLShaderAttr *mergeShaderAttr(Decl *D, const AttributeCommonInfo &AL, - HLSLShaderAttr::ShaderType ShaderType); + llvm::Triple::EnvironmentType ShaderType); HLSLParamModifierAttr * mergeParamModifierAttr(Decl *D, const AttributeCommonInfo &AL, HLSLParamModifierAttr::Spelling Spelling); @@ -48,8 +48,8 @@ public: void CheckSemanticAnnotation(FunctionDecl *EntryPoint, const Decl *Param, const HLSLAnnotationAttr *AnnotationAttr); void DiagnoseAttrStageMismatch( - const Attr *A, HLSLShaderAttr::ShaderType Stage, - std::initializer_list AllowedStages); + const Attr *A, llvm::Triple::EnvironmentType Stage, + std::initializer_list AllowedStages); void DiagnoseAvailabilityViolations(TranslationUnitDecl *TU); void handleNumThreadsAttr(Decl *D, const ParsedAttr &AL); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 5e6a3dd..55ba21a 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -313,7 +313,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes( assert(ShaderAttr && "All entry functions must have a HLSLShaderAttr"); const StringRef ShaderAttrKindStr = "hlsl.shader"; Fn->addFnAttr(ShaderAttrKindStr, - ShaderAttr->ConvertShaderTypeToStr(ShaderAttr->getType())); + llvm::Triple::getEnvironmentTypeName(ShaderAttr->getType())); if (HLSLNumThreadsAttr *NumThreadsAttr = FD->getAttr()) { const StringRef NumThreadsKindStr = "hlsl.numthreads"; std::string NumThreadsStr = diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 0a2face..144cdcc 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -146,7 +146,7 @@ HLSLNumThreadsAttr *SemaHLSL::mergeNumThreadsAttr(Decl *D, HLSLShaderAttr * SemaHLSL::mergeShaderAttr(Decl *D, const AttributeCommonInfo &AL, - HLSLShaderAttr::ShaderType ShaderType) { + llvm::Triple::EnvironmentType ShaderType) { if (HLSLShaderAttr *NT = D->getAttr()) { if (NT->getType() != ShaderType) { Diag(NT->getLocation(), diag::err_hlsl_attribute_param_mismatch) << AL; @@ -184,13 +184,12 @@ void SemaHLSL::ActOnTopLevelFunction(FunctionDecl *FD) { if (FD->getName() != TargetInfo.getTargetOpts().HLSLEntry) return; - StringRef Env = TargetInfo.getTriple().getEnvironmentName(); - HLSLShaderAttr::ShaderType ShaderType; - if (HLSLShaderAttr::ConvertStrToShaderType(Env, ShaderType)) { + llvm::Triple::EnvironmentType Env = TargetInfo.getTriple().getEnvironment(); + if (HLSLShaderAttr::isValidShaderType(Env) && Env != llvm::Triple::Library) { if (const auto *Shader = FD->getAttr()) { // The entry point is already annotated - check that it matches the // triple. - if (Shader->getType() != ShaderType) { + if (Shader->getType() != Env) { Diag(Shader->getLocation(), diag::err_hlsl_entry_shader_attr_mismatch) << Shader; FD->setInvalidDecl(); @@ -198,11 +197,11 @@ void SemaHLSL::ActOnTopLevelFunction(FunctionDecl *FD) { } else { // Implicitly add the shader attribute if the entry function isn't // explicitly annotated. - FD->addAttr(HLSLShaderAttr::CreateImplicit(getASTContext(), ShaderType, + FD->addAttr(HLSLShaderAttr::CreateImplicit(getASTContext(), Env, FD->getBeginLoc())); } } else { - switch (TargetInfo.getTriple().getEnvironment()) { + switch (Env) { case llvm::Triple::UnknownEnvironment: case llvm::Triple::Library: break; @@ -215,38 +214,40 @@ void SemaHLSL::ActOnTopLevelFunction(FunctionDecl *FD) { void SemaHLSL::CheckEntryPoint(FunctionDecl *FD) { const auto *ShaderAttr = FD->getAttr(); assert(ShaderAttr && "Entry point has no shader attribute"); - HLSLShaderAttr::ShaderType ST = ShaderAttr->getType(); + llvm::Triple::EnvironmentType ST = ShaderAttr->getType(); switch (ST) { - case HLSLShaderAttr::Pixel: - case HLSLShaderAttr::Vertex: - case HLSLShaderAttr::Geometry: - case HLSLShaderAttr::Hull: - case HLSLShaderAttr::Domain: - case HLSLShaderAttr::RayGeneration: - case HLSLShaderAttr::Intersection: - case HLSLShaderAttr::AnyHit: - case HLSLShaderAttr::ClosestHit: - case HLSLShaderAttr::Miss: - case HLSLShaderAttr::Callable: + case llvm::Triple::Pixel: + case llvm::Triple::Vertex: + case llvm::Triple::Geometry: + case llvm::Triple::Hull: + case llvm::Triple::Domain: + case llvm::Triple::RayGeneration: + case llvm::Triple::Intersection: + case llvm::Triple::AnyHit: + case llvm::Triple::ClosestHit: + case llvm::Triple::Miss: + case llvm::Triple::Callable: if (const auto *NT = FD->getAttr()) { DiagnoseAttrStageMismatch(NT, ST, - {HLSLShaderAttr::Compute, - HLSLShaderAttr::Amplification, - HLSLShaderAttr::Mesh}); + {llvm::Triple::Compute, + llvm::Triple::Amplification, + llvm::Triple::Mesh}); FD->setInvalidDecl(); } break; - case HLSLShaderAttr::Compute: - case HLSLShaderAttr::Amplification: - case HLSLShaderAttr::Mesh: + case llvm::Triple::Compute: + case llvm::Triple::Amplification: + case llvm::Triple::Mesh: if (!FD->hasAttr()) { Diag(FD->getLocation(), diag::err_hlsl_missing_numthreads) - << HLSLShaderAttr::ConvertShaderTypeToStr(ST); + << llvm::Triple::getEnvironmentTypeName(ST); FD->setInvalidDecl(); } break; + default: + llvm_unreachable("Unhandled environment in triple"); } for (ParmVarDecl *Param : FD->parameters()) { @@ -268,14 +269,14 @@ void SemaHLSL::CheckSemanticAnnotation( const HLSLAnnotationAttr *AnnotationAttr) { auto *ShaderAttr = EntryPoint->getAttr(); assert(ShaderAttr && "Entry point has no shader attribute"); - HLSLShaderAttr::ShaderType ST = ShaderAttr->getType(); + llvm::Triple::EnvironmentType ST = ShaderAttr->getType(); switch (AnnotationAttr->getKind()) { case attr::HLSLSV_DispatchThreadID: case attr::HLSLSV_GroupIndex: - if (ST == HLSLShaderAttr::Compute) + if (ST == llvm::Triple::Compute) return; - DiagnoseAttrStageMismatch(AnnotationAttr, ST, {HLSLShaderAttr::Compute}); + DiagnoseAttrStageMismatch(AnnotationAttr, ST, {llvm::Triple::Compute}); break; default: llvm_unreachable("Unknown HLSLAnnotationAttr"); @@ -283,16 +284,16 @@ void SemaHLSL::CheckSemanticAnnotation( } void SemaHLSL::DiagnoseAttrStageMismatch( - const Attr *A, HLSLShaderAttr::ShaderType Stage, - std::initializer_list AllowedStages) { + const Attr *A, llvm::Triple::EnvironmentType Stage, + std::initializer_list AllowedStages) { SmallVector StageStrings; llvm::transform(AllowedStages, std::back_inserter(StageStrings), - [](HLSLShaderAttr::ShaderType ST) { + [](llvm::Triple::EnvironmentType ST) { return StringRef( - HLSLShaderAttr::ConvertShaderTypeToStr(ST)); + HLSLShaderAttr::ConvertEnvironmentTypeToStr(ST)); }); Diag(A->getLoc(), diag::err_hlsl_attr_unsupported_in_stage) - << A << HLSLShaderAttr::ConvertShaderTypeToStr(Stage) + << A << llvm::Triple::getEnvironmentTypeName(Stage) << (AllowedStages.size() != 1) << join(StageStrings, ", "); } @@ -430,8 +431,8 @@ void SemaHLSL::handleShaderAttr(Decl *D, const ParsedAttr &AL) { if (!SemaRef.checkStringLiteralArgumentAttr(AL, 0, Str, &ArgLoc)) return; - HLSLShaderAttr::ShaderType ShaderType; - if (!HLSLShaderAttr::ConvertStrToShaderType(Str, ShaderType)) { + llvm::Triple::EnvironmentType ShaderType; + if (!HLSLShaderAttr::ConvertStrToEnvironmentType(Str, ShaderType)) { Diag(AL.getLoc(), diag::warn_attribute_type_not_supported) << AL << Str << ArgLoc; return; @@ -549,16 +550,22 @@ class DiagnoseHLSLAvailability // // Maps FunctionDecl to an unsigned number that represents the set of shader // environments the function has been scanned for. - // Since HLSLShaderAttr::ShaderType enum is generated from Attr.td and is - // defined without any assigned values, it is guaranteed to be numbered - // sequentially from 0 up and we can use it to 'index' individual bits - // in the set. + // The llvm::Triple::EnvironmentType enum values for shader stages guaranteed + // to be numbered from llvm::Triple::Pixel to llvm::Triple::Amplification + // (verified by static_asserts in Triple.cpp), we can use it to index + // individual bits in the set, as long as we shift the values to start with 0 + // by subtracting the value of llvm::Triple::Pixel first. + // // The N'th bit in the set will be set if the function has been scanned - // in shader environment whose ShaderType integer value equals N. + // in shader environment whose llvm::Triple::EnvironmentType integer value + // equals (llvm::Triple::Pixel + N). + // // For example, if a function has been scanned in compute and pixel stage - // environment, the value will be 0x21 (100001 binary) because - // (int)HLSLShaderAttr::ShaderType::Pixel == 1 and - // (int)HLSLShaderAttr::ShaderType::Compute == 5. + // environment, the value will be 0x21 (100001 binary) because: + // + // (int)(llvm::Triple::Pixel - llvm::Triple::Pixel) == 0 + // (int)(llvm::Triple::Compute - llvm::Triple::Pixel) == 5 + // // A FunctionDecl is mapped to 0 (or not included in the map) if it has not // been scanned in any environment. llvm::DenseMap ScannedDecls; @@ -574,12 +581,16 @@ class DiagnoseHLSLAvailability bool ReportOnlyShaderStageIssues; // Helper methods for dealing with current stage context / environment - void SetShaderStageContext(HLSLShaderAttr::ShaderType ShaderType) { + void SetShaderStageContext(llvm::Triple::EnvironmentType ShaderType) { static_assert(sizeof(unsigned) >= 4); - assert((unsigned)ShaderType < 31); // 31 is reserved for "unknown" - - CurrentShaderEnvironment = HLSLShaderAttr::getTypeAsEnvironment(ShaderType); - CurrentShaderStageBit = (1 << ShaderType); + assert(HLSLShaderAttr::isValidShaderType(ShaderType)); + assert((unsigned)(ShaderType - llvm::Triple::Pixel) < 31 && + "ShaderType is too big for this bitmap"); // 31 is reserved for + // "unknown" + + unsigned bitmapIndex = ShaderType - llvm::Triple::Pixel; + CurrentShaderEnvironment = ShaderType; + CurrentShaderStageBit = (1 << bitmapIndex); } void SetUnknownShaderStageContext() { -- cgit v1.1 From 5aabbf0602c48b67bb89fd37f95bf97c95ded488 Mon Sep 17 00:00:00 2001 From: Vladimir Vereschaka Date: Fri, 7 Jun 2024 22:05:41 -0700 Subject: [CMake] Update CMake cache file for the ARM/Aarch64 cross toolchain builds. NFC. (#94835) * generate Clang configuration file with provided target sysroot (TOOLCHAIN_TARGET_SYSROOTFS) * explicitly pass provided target sysroot into the compiler-rt tests configuration. * added ability to configure a type of the build libraries -- shared or static (TOOLCHAIN_SHARED_LIBS, default OFF) In behalf of: #94284 --- clang/cmake/caches/CrossWinToARMLinux.cmake | 45 ++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/clang/cmake/caches/CrossWinToARMLinux.cmake b/clang/cmake/caches/CrossWinToARMLinux.cmake index 6826d01..e4d0a0c 100644 --- a/clang/cmake/caches/CrossWinToARMLinux.cmake +++ b/clang/cmake/caches/CrossWinToARMLinux.cmake @@ -6,21 +6,23 @@ # on Windows platform. # # NOTE: the build requires a development ARM Linux root filesystem to use -# proper target platform depended library and header files: -# - create directory and put the clang configuration -# file named .cfg into it. -# - add the `--sysroot=` argument into -# this configuration file. -# - add other necessary target depended clang arguments there, -# such as '-mcpu=cortex-a78' & etc. +# proper target platform depended library and header files. +# +# The build generates a proper clang configuration file with stored +# --sysroot argument for specified target triple. Also it is possible +# to specify configuration path via CMake arguments, such as +# -DCLANG_CONFIG_FILE_USER_DIR= +# and/or +# -DCLANG_CONFIG_FILE_SYSTEM_DIR= # # See more details here: https://clang.llvm.org/docs/UsersManual.html#configuration-files # # Configure: # cmake -G Ninja ^ # -DTOOLCHAIN_TARGET_TRIPLE=aarch64-unknown-linux-gnu ^ +# -DTOOLCHAIN_TARGET_SYSROOTFS= ^ +# -DTOOLCHAIN_SHARED_LIBS=OFF ^ # -DCMAKE_INSTALL_PREFIX=../install ^ -# -DCLANG_CONFIG_FILE_USER_DIR= ^ # -DCMAKE_CXX_FLAGS="-D__OPTIMIZE__" ^ # -DREMOTE_TEST_HOST="" ^ # -DREMOTE_TEST_USER="" ^ @@ -81,6 +83,20 @@ endif() message(STATUS "Toolchain target triple: ${TOOLCHAIN_TARGET_TRIPLE}") +if (DEFINED TOOLCHAIN_TARGET_SYSROOTFS) + message(STATUS "Toolchain target sysroot: ${TOOLCHAIN_TARGET_SYSROOTFS}") + # Store the --sysroot argument for the compiler-rt test flags. + set(sysroot_flags --sysroot='${TOOLCHAIN_TARGET_SYSROOTFS}') + # Generate the clang configuration file for the specified target triple + # and store --sysroot in this file. + file(WRITE "${CMAKE_BINARY_DIR}/bin/${TOOLCHAIN_TARGET_TRIPLE}.cfg" ${sysroot_flags}) +endif() + +# Build the shared libraries for libc++/libc++abi/libunwind. +if (NOT DEFINED TOOLCHAIN_SHARED_LIBS) + set(TOOLCHAIN_SHARED_LIBS OFF) +endif() + if (NOT DEFINED LLVM_TARGETS_TO_BUILD) if ("${TOOLCHAIN_TARGET_TRIPLE}" MATCHES "^(armv|arm32)+") set(LLVM_TARGETS_TO_BUILD "ARM" CACHE STRING "") @@ -183,20 +199,21 @@ set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_CAN_EXECUTE_TESTS set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_USE_BUILTINS_LIBRARY ON CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_CXX_LIBRARY libcxx CACHE STRING "") -# Tell Clang to seach C++ headers alongside with the just-built binaries for the C++ compiler-rt tests. -set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_TEST_COMPILER_CFLAGS "--stdlib=libc++" CACHE STRING "") - +# The compiler-rt tests disable the clang configuration files during the execution by setting CLANG_NO_DEFAULT_CONFIG=1 +# and drops out the --sysroot from there. Provide it explicity via the test flags here if target sysroot has been specified. +set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_TEST_COMPILER_CFLAGS "--stdlib=libc++ ${sysroot_flags}" CACHE STRING "") + set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBUNWIND_USE_COMPILER_RT ON CACHE BOOL "") -set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBUNWIND_ENABLE_SHARED OFF CACHE BOOL "") +set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBUNWIND_ENABLE_SHARED ${TOOLCHAIN_SHARED_LIBS} CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXXABI_USE_LLVM_UNWINDER ON CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXXABI_ENABLE_STATIC_UNWINDER ON CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXXABI_USE_COMPILER_RT ON CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") -set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "") +set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXXABI_ENABLE_SHARED ${TOOLCHAIN_SHARED_LIBS} CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXX_USE_COMPILER_RT ON CACHE BOOL "") -set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXX_ENABLE_SHARED OFF CACHE BOOL "") +set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXX_ENABLE_SHARED ${TOOLCHAIN_SHARED_LIBS} CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXX_ABI_VERSION ${LIBCXX_ABI_VERSION} CACHE STRING "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXX_CXX_ABI "libcxxabi" CACHE STRING "") #!!! set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "") -- cgit v1.1 From 950605bdd8f34bfa9664e71224a23da769c870ec Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 22:52:47 -0700 Subject: [RISCV] Remove many ImmType parameters from tablegen classes. NFC These usually have a single value that is always used. We can just hardcode into the class body. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 61 ++++++++++++------------- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 5 +- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 1a51481..4a67b1b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2317,11 +2317,11 @@ multiclass VPseudoVSLD1_VF { } } -multiclass VPseudoBinaryV_VI { +multiclass VPseudoBinaryV_VI { defm _VI : VPseudoBinary; } -multiclass VPseudoBinaryV_VI_RM { +multiclass VPseudoBinaryV_VI_RM { defm _VI : VPseudoBinaryRoundingMode; } @@ -2696,13 +2696,13 @@ multiclass VPseudoBinaryM_VI { !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; } -multiclass VPseudoVGTR_VV_VX_VI { +multiclass VPseudoVGTR_VV_VX_VI { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VX, SchedBinary<"WriteVRGatherVX", "ReadVRGatherVX_data", "ReadVRGatherVX_index", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VI, + defm "" : VPseudoBinaryV_VI, SchedUnary<"WriteVRGatherVI", "ReadVRGatherVI_data", mx, forceMergeOpRead=true>; @@ -2715,8 +2715,7 @@ multiclass VPseudoVGTR_VV_VX_VI } } -multiclass VPseudoVSALU_VV_VX_VI { +multiclass VPseudoVSALU_VV_VX_VI { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VV, @@ -2725,13 +2724,13 @@ multiclass VPseudoVSALU_VV_VX_VI, SchedBinary<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VI, + defm "" : VPseudoBinaryV_VI, SchedUnary<"WriteVSALUI", "ReadVSALUV", mx, forceMergeOpRead=true>; } } -multiclass VPseudoVSHT_VV_VX_VI { +multiclass VPseudoVSHT_VV_VX_VI { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VV, @@ -2740,12 +2739,12 @@ multiclass VPseudoVSHT_VV_VX_VI defm "" : VPseudoBinaryV_VX, SchedBinary<"WriteVShiftX", "ReadVShiftV", "ReadVShiftX", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VI, + defm "" : VPseudoBinaryV_VI, SchedUnary<"WriteVShiftI", "ReadVShiftV", mx, forceMergeOpRead=true>; } } -multiclass VPseudoVSSHT_VV_VX_VI_RM { +multiclass VPseudoVSSHT_VV_VX_VI_RM { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VV_RM, @@ -2754,13 +2753,12 @@ multiclass VPseudoVSSHT_VV_VX_VI_RM, SchedBinary<"WriteVSShiftX", "ReadVSShiftV", "ReadVSShiftX", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VI_RM, + defm "" : VPseudoBinaryV_VI_RM, SchedUnary<"WriteVSShiftI", "ReadVSShiftV", mx, forceMergeOpRead=true>; } } -multiclass VPseudoVALU_VV_VX_VI { +multiclass VPseudoVALU_VV_VX_VI { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VV, @@ -2769,7 +2767,7 @@ multiclass VPseudoVALU_VV_VX_VI, SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VI, + defm "" : VPseudoBinaryV_VI, SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>; } } @@ -2962,13 +2960,13 @@ multiclass VPseudoVALU_VF_RM { } } -multiclass VPseudoVALU_VX_VI { +multiclass VPseudoVALU_VX_VI { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VX, SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx, forceMergeOpRead=true>; - defm "" : VPseudoBinaryV_VI, + defm "" : VPseudoBinaryV_VI, SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>; } } @@ -3333,8 +3331,8 @@ multiclass VPseudoVSLDV_VX { defm _VX : VPseudoVSLDVWithPolicy; } -multiclass VPseudoVSLDV_VI { - defm _VI : VPseudoVSLDVWithPolicy; +multiclass VPseudoVSLDV_VI { + defm _VI : VPseudoVSLDVWithPolicy; } multiclass VPseudoVMAC_VV_VX_AAXA { @@ -3366,15 +3364,14 @@ multiclass VPseudoVMAC_VV_VF_AAXA_RM { } } -multiclass VPseudoVSLD_VX_VI { +multiclass VPseudoVSLD_VX_VI { defvar WriteSlideX = !if(slidesUp, "WriteVSlideUpX", "WriteVSlideDownX"); foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoVSLDV_VX, SchedTernary; - defm "" : VPseudoVSLDV_VI, + defm "" : VPseudoVSLDV_VI, SchedBinary<"WriteVSlideI", "ReadVISlideV", "ReadVISlideV", mx>; } } @@ -5441,7 +5438,7 @@ multiclass VPatBinaryV_VV_VX_VI; multiclass VPatBinaryV_VV_VX_VI_RM vtilist, Operand ImmType = simm5> + list vtilist, Operand ImmType> : VPatBinaryV_VV_RM, VPatBinaryV_VX_RM, VPatBinaryV_VI_RM; @@ -5777,7 +5774,7 @@ multiclass VPatTernaryV_VV_VX_AAXA_RM; multiclass VPatTernaryV_VX_VI vtilist, Operand Imm_type = simm5> + list vtilist, Operand Imm_type> : VPatTernaryV_VX, VPatTernaryV_VI; @@ -5809,7 +5806,7 @@ multiclass VPatBinaryM_VX_VI; multiclass VPatBinaryV_VV_VX_VI_INT vtilist, Operand ImmType = simm5> + list vtilist, Operand ImmType> : VPatBinaryV_VV_INT, VPatBinaryV_VX_INT, VPatBinaryV_VI; @@ -6329,9 +6326,9 @@ defm PseudoVXOR : VPseudoVALU_VV_VX_VI; //===----------------------------------------------------------------------===// // 11.6. Vector Single-Width Bit Shift Instructions //===----------------------------------------------------------------------===// -defm PseudoVSLL : VPseudoVSHT_VV_VX_VI; -defm PseudoVSRL : VPseudoVSHT_VV_VX_VI; -defm PseudoVSRA : VPseudoVSHT_VV_VX_VI; +defm PseudoVSLL : VPseudoVSHT_VV_VX_VI; +defm PseudoVSRL : VPseudoVSHT_VV_VX_VI; +defm PseudoVSRA : VPseudoVSHT_VV_VX_VI; //===----------------------------------------------------------------------===// // 11.7. Vector Narrowing Integer Right Shift Instructions @@ -6440,8 +6437,8 @@ let Defs = [VXSAT] in { //===----------------------------------------------------------------------===// // 12.4. Vector Single-Width Scaling Shift Instructions //===----------------------------------------------------------------------===// -defm PseudoVSSRL : VPseudoVSSHT_VV_VX_VI_RM; -defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI_RM; +defm PseudoVSSRL : VPseudoVSSHT_VV_VX_VI_RM; +defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI_RM; //===----------------------------------------------------------------------===// // 12.5. Vector Narrowing Fixed-Point Clip Instructions @@ -6826,8 +6823,8 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { // 16.3. Vector Slide Instructions //===----------------------------------------------------------------------===// let Predicates = [HasVInstructions] in { - defm PseudoVSLIDEUP : VPseudoVSLD_VX_VI; - defm PseudoVSLIDEDOWN : VPseudoVSLD_VX_VI; + defm PseudoVSLIDEUP : VPseudoVSLD_VX_VI; + defm PseudoVSLIDEDOWN : VPseudoVSLD_VX_VI; defm PseudoVSLIDE1UP : VPseudoVSLD1_VX<"@earlyclobber $rd">; defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX; } // Predicates = [HasVInstructions] @@ -6841,7 +6838,7 @@ let Predicates = [HasVInstructionsAnyF] in { // 16.4. Vector Register Gather Instructions //===----------------------------------------------------------------------===// let Predicates = [HasVInstructions] in { -defm PseudoVRGATHER : VPseudoVGTR_VV_VX_VI; +defm PseudoVRGATHER : VPseudoVGTR_VV_VX_VI<"@earlyclobber $rd">; defm PseudoVRGATHEREI16 : VPseudoVGTR_EI16_VV; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 98b5aee..4bae0d0e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -1025,11 +1025,10 @@ multiclass VPatBinaryV_VV_VX_VROL; multiclass VPatBinaryV_VV_VX_VI_VROR vtilist, - Operand ImmType = uimm6> + list vtilist> : VPatBinaryV_VV, VPatBinaryV_VX_VROTATE, - VPatBinaryV_VI; + VPatBinaryV_VI; multiclass VPatBinaryW_VV_VX_VI_VWSLL vtilist> -- cgit v1.1 From 2fa14fca4fa1ac626fa6fda9c322680bec4307d1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 23:09:42 -0700 Subject: [RISCV] Remove unused defaults for sew paramters in tablegen. NFC Also remove some unused Constraint paramters that appeared before the sew parameter. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 55 ++++++++++++------------- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 8 ++-- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 4a67b1b..1a5fc1c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2194,7 +2194,7 @@ multiclass VPseudoBinaryEmul { + int sew> { let VLMul = lmul.value, SEW=sew in { defvar suffix = !if(sew, "_" # lmul.MX # "_E" # sew, "_" # lmul.MX); def suffix # "_" # emul.MX : VPseudoBinaryNoMaskPolicy { - defm _VV : VPseudoBinary; +multiclass VPseudoBinaryFV_VV { + defm _VV : VPseudoBinary; } -multiclass VPseudoBinaryFV_VV_RM { +multiclass VPseudoBinaryFV_VV_RM { defm _VV : VPseudoBinaryRoundingMode; + "", sew, UsesVXRM=0>; } multiclass VPseudoVGTR_EI16_VV { @@ -2295,14 +2294,14 @@ multiclass VPseudoVSLD1_VX { } } -multiclass VPseudoBinaryV_VF { +multiclass VPseudoBinaryV_VF { defm "_V" # f.FX : VPseudoBinary; + f.fprclass, m, "", sew>; } -multiclass VPseudoBinaryV_VF_RM { +multiclass VPseudoBinaryV_VF_RM { defm "_V" # f.FX : VPseudoBinaryRoundingMode; } @@ -2348,7 +2347,7 @@ multiclass VPseudoBinaryW_VV { Commutable=Commutable>; } -multiclass VPseudoBinaryW_VV_RM { +multiclass VPseudoBinaryW_VV_RM { defm _VV : VPseudoBinaryRoundingMode; @@ -2364,7 +2363,7 @@ multiclass VPseudoBinaryW_VI { "@earlyclobber $rd", TargetConstraintType=3>; } -multiclass VPseudoBinaryW_VF_RM { +multiclass VPseudoBinaryW_VF_RM { defm "_V" # f.FX : VPseudoBinaryRoundingMode { "@earlyclobber $rd", TargetConstraintType=3>; } -multiclass VPseudoBinaryW_WV_RM { +multiclass VPseudoBinaryW_WV_RM { defm _WV : VPseudoBinaryRoundingMode; @@ -2392,7 +2391,7 @@ multiclass VPseudoBinaryW_WX { defm "_WX" : VPseudoBinary; } -multiclass VPseudoBinaryW_WF_RM { +multiclass VPseudoBinaryW_WF_RM { defm "_W" # f.FX : VPseudoBinaryRoundingMode.val in - defm "" : VPseudoBinaryFV_VV_RM, + defm "" : VPseudoBinaryFV_VV_RM, SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX, e, forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defm "" : VPseudoBinaryV_VF_RM, + defm "" : VPseudoBinaryV_VF_RM, SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX, f.SEW, forceMergeOpRead=true>; } @@ -2863,7 +2862,7 @@ multiclass VPseudoVFDIV_VV_VF_RM { defvar mx = m.MX; defvar sews = SchedSEWSet.val; foreach e = sews in { - defm "" : VPseudoBinaryFV_VV_RM, + defm "" : VPseudoBinaryFV_VV_RM, SchedBinary<"WriteVFDivV", "ReadVFDivV", "ReadVFDivV", mx, e, forceMergeOpRead=true>; } @@ -2871,7 +2870,7 @@ multiclass VPseudoVFDIV_VV_VF_RM { foreach f = FPList in { foreach m = f.MxList in { - defm "" : VPseudoBinaryV_VF_RM, + defm "" : VPseudoBinaryV_VF_RM, SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW, forceMergeOpRead=true>; } @@ -2881,7 +2880,7 @@ multiclass VPseudoVFDIV_VV_VF_RM { multiclass VPseudoVFRDIV_VF_RM { foreach f = FPList in { foreach m = f.MxList in { - defm "" : VPseudoBinaryV_VF_RM, + defm "" : VPseudoBinaryV_VF_RM, SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW, forceMergeOpRead=true>; } @@ -2936,14 +2935,14 @@ multiclass VPseudoVMAX_VV_VF { multiclass VPseudoVALU_VV_VF_RM { foreach m = MxListF in { foreach e = SchedSEWSet.val in - defm "" : VPseudoBinaryFV_VV_RM, + defm "" : VPseudoBinaryFV_VV_RM, SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX, e, forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defm "" : VPseudoBinaryV_VF_RM, + defm "" : VPseudoBinaryV_VF_RM, SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, f.SEW, forceMergeOpRead=true>; } @@ -2953,7 +2952,7 @@ multiclass VPseudoVALU_VV_VF_RM { multiclass VPseudoVALU_VF_RM { foreach f = FPList in { foreach m = f.MxList in { - defm "" : VPseudoBinaryV_VF_RM, + defm "" : VPseudoBinaryV_VF_RM, SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, f.SEW, forceMergeOpRead=true>; } @@ -3246,8 +3245,8 @@ multiclass VPseudoTernaryWithPolicyRoundingMode { let VLMul = MInfo.value in { @@ -3271,7 +3270,7 @@ multiclass VPseudoTernaryV_VV_AAXA { Constraint, Commutable=1>; } -multiclass VPseudoTernaryV_VV_AAXA_RM { +multiclass VPseudoTernaryV_VV_AAXA_RM { defm _VV : VPseudoTernaryWithPolicyRoundingMode; } @@ -3282,7 +3281,7 @@ multiclass VPseudoTernaryV_VX_AAXA { } multiclass VPseudoTernaryV_VF_AAXA_RM { + string Constraint, int sew> { defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode; @@ -3294,7 +3293,7 @@ multiclass VPseudoTernaryW_VV { constraint, Commutable=Commutable, TargetConstraintType=3>; } -multiclass VPseudoTernaryW_VV_RM { +multiclass VPseudoTernaryW_VV_RM { defvar constraint = "@earlyclobber $rd"; defm _VV : VPseudoTernaryWithPolicyRoundingMode { constraint, /*Commutable*/ 0, TargetConstraintType=3>; } -multiclass VPseudoTernaryW_VF_RM { +multiclass VPseudoTernaryW_VF_RM { defvar constraint = "@earlyclobber $rd"; defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode { - let VLMul = MInfo.value, SEW=sew in { - defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); - def suffix : VPseudoBinaryNoMaskPolicy { + let VLMul = MInfo.value in { + def "_" # MInfo.MX : VPseudoBinaryNoMaskPolicy; } } -- cgit v1.1 From d3fc5cf24a93003ba963fc406aa1901a292d55f4 Mon Sep 17 00:00:00 2001 From: Shivam Gupta Date: Sat, 8 Jun 2024 12:02:01 +0530 Subject: [lldb] Remove redundant c_str() calls in stream output (NFC) (#94839) Passing the result of c_str() to a stream is slow and redundant. This change removes unnecessary c_str() calls and uses the string object directly. Caught by cppcheck - lldb/tools/debugserver/source/JSON.cpp:398:19: performance: Passing the result of c_str() to a stream is slow and redundant. [stlcstrStream] lldb/tools/debugserver/source/JSON.cpp:408:64: performance: Passing the result of c_str() to a stream is slow and redundant. [stlcstrStream] lldb/tools/debugserver/source/JSON.cpp:420:54: performance: Passing the result of c_str() to a stream is slow and redundant. [stlcstrStream] lldb/tools/debugserver/source/JSON.cpp:46:13: performance: Passing the result of c_str() to a stream is slow and redundant. [stlcstrStream] Fix #91212 --- lldb/tools/debugserver/source/JSON.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lldb/tools/debugserver/source/JSON.cpp b/lldb/tools/debugserver/source/JSON.cpp index 315c52a..5453d85 100644 --- a/lldb/tools/debugserver/source/JSON.cpp +++ b/lldb/tools/debugserver/source/JSON.cpp @@ -43,7 +43,7 @@ JSONString::JSONString(const std::string &s) : JSONValue(JSONValue::Kind::String), m_data(s) {} void JSONString::Write(std::ostream &s) { - s << "\"" << json_string_quote_metachars(m_data).c_str() << "\""; + s << "\"" << json_string_quote_metachars(m_data) << "\""; } uint64_t JSONNumber::GetAsUnsigned() const { @@ -395,7 +395,7 @@ JSONParser::Token JSONParser::GetToken(std::string &value) { } else { error << "error: got exponent character but no exponent digits at " "offset in float value \"" - << value.c_str() << "\""; + << value << "\""; value = error.str(); return Token::Status; } @@ -405,8 +405,7 @@ JSONParser::Token JSONParser::GetToken(std::string &value) { if (got_frac_digits) { return Token::Float; } else { - error << "error: no digits after decimal point \"" << value.c_str() - << "\""; + error << "error: no digits after decimal point \"" << value << "\""; value = error.str(); return Token::Status; } @@ -417,7 +416,7 @@ JSONParser::Token JSONParser::GetToken(std::string &value) { // We need at least some integer digits to make an integer return Token::Integer; } else { - error << "error: no digits negate sign \"" << value.c_str() << "\""; + error << "error: no digits negate sign \"" << value << "\""; value = error.str(); return Token::Status; } -- cgit v1.1 From 2e1788f8e265b97b4f19cfdeb515921dc9e317f6 Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Sat, 8 Jun 2024 09:33:11 +0300 Subject: Revert "[lld][AArch64][ELF][PAC] Support `.relr.auth.dyn` section" (#94843) Reverts llvm/llvm-project#87635 On some corner cases, lld generated an object file with an empty REL section with `sh_info` set to 0. This file triggers an lld error when used as its input. See https://github.com/llvm/llvm-project/pull/87635#issuecomment-2155318065 for details. --- lld/ELF/Arch/AArch64.cpp | 13 --- lld/ELF/Relocations.cpp | 10 +-- lld/ELF/SyntheticSections.cpp | 24 ++---- lld/ELF/SyntheticSections.h | 15 ++-- lld/ELF/Writer.cpp | 35 -------- lld/test/ELF/aarch64-reloc-pauth.s | 166 ++++++++----------------------------- 6 files changed, 47 insertions(+), 216 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index cf5c238..47e6ea1 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -429,19 +429,6 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_PREL64: write64(loc, val); break; - case R_AARCH64_AUTH_ABS64: - // If val is wider than 32 bits, the relocation must have been moved from - // .relr.auth.dyn to .rela.dyn, and the addend write is not needed. - // - // If val fits in 32 bits, we have two potential scenarios: - // * True RELR: Write the 32-bit `val`. - // * RELA: Even if the value now fits in 32 bits, it might have been - // converted from RELR during an iteration in - // finalizeAddressDependentContent(). Writing the value is harmless - // because dynamic linking ignores it. - if (isInt<32>(val)) - write32(loc, val); - break; case R_AARCH64_ADD_ABS_LO12_NC: or32AArch64Imm(loc, val); break; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 2c02c2e..04db413 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -898,9 +898,9 @@ static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec, isec.addReloc({expr, type, offsetInSec, addend, &sym}); if (shard) part.relrDyn->relocsVec[parallel::getThreadIndex()].push_back( - {&isec, isec.relocs().size() - 1}); + {&isec, offsetInSec}); else - part.relrDyn->relocs.push_back({&isec, isec.relocs().size() - 1}); + part.relrDyn->relocs.push_back({&isec, offsetInSec}); return; } part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, @@ -1154,12 +1154,6 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // relative relocation. Use a symbolic relocation instead. if (sym.isPreemptible) { part.relaDyn->addSymbolReloc(type, *sec, offset, sym, addend, type); - } else if (part.relrAuthDyn && sec->addralign >= 2 && offset % 2 == 0) { - // When symbol values are determined in - // finalizeAddressDependentContent, some .relr.auth.dyn relocations - // may be moved to .rela.dyn. - sec->addReloc({expr, type, offset, addend, &sym}); - part.relrAuthDyn->relocs.push_back({sec, sec->relocs().size() - 1}); } else { part.relaDyn->addReloc({R_AARCH64_AUTH_RELATIVE, sec, offset, DynamicReloc::AddendOnlyWithTargetVA, sym, diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index ad28028..cc423d15 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1420,12 +1420,6 @@ DynamicSection::computeContents() { addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT, sizeof(Elf_Relr)); } - if (part.relrAuthDyn && part.relrAuthDyn->getParent() && - !part.relrAuthDyn->relocs.empty()) { - addInSec(DT_AARCH64_AUTH_RELR, *part.relrAuthDyn); - addInt(DT_AARCH64_AUTH_RELRSZ, part.relrAuthDyn->getParent()->size); - addInt(DT_AARCH64_AUTH_RELRENT, sizeof(Elf_Relr)); - } if (isMain && in.relaPlt->isNeeded()) { addInSec(DT_JMPREL, *in.relaPlt); entries.emplace_back(DT_PLTRELSZ, addPltRelSz()); @@ -1737,13 +1731,10 @@ template void RelocationSection::writeTo(uint8_t *buf) { } } -RelrBaseSection::RelrBaseSection(unsigned concurrency, bool isAArch64Auth) - : SyntheticSection( - SHF_ALLOC, - isAArch64Auth - ? SHT_AARCH64_AUTH_RELR - : (config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR), - config->wordsize, isAArch64Auth ? ".relr.auth.dyn" : ".relr.dyn"), +RelrBaseSection::RelrBaseSection(unsigned concurrency) + : SyntheticSection(SHF_ALLOC, + config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR, + config->wordsize, ".relr.dyn"), relocsVec(concurrency) {} void RelrBaseSection::mergeRels() { @@ -2011,8 +2002,8 @@ bool AndroidPackedRelocationSection::updateAllocSize() { } template -RelrSection::RelrSection(unsigned concurrency, bool isAArch64Auth) - : RelrBaseSection(concurrency, isAArch64Auth) { +RelrSection::RelrSection(unsigned concurrency) + : RelrBaseSection(concurrency) { this->entsize = config->wordsize; } @@ -4783,9 +4774,6 @@ template void elf::createSyntheticSections() { if (config->relrPackDynRelocs) { part.relrDyn = std::make_unique>(threadCount); add(*part.relrDyn); - part.relrAuthDyn = std::make_unique>( - threadCount, /*isAArch64Auth=*/true); - add(*part.relrAuthDyn); } if (!config->relocatable) { diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index eaa09ea..34949025 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -548,9 +548,7 @@ public: static bool classof(const SectionBase *d) { return SyntheticSection::classof(d) && (d->type == llvm::ELF::SHT_RELA || d->type == llvm::ELF::SHT_REL || - d->type == llvm::ELF::SHT_RELR || - (d->type == llvm::ELF::SHT_AARCH64_AUTH_RELR && - config->emachine == llvm::ELF::EM_AARCH64)); + d->type == llvm::ELF::SHT_RELR); } int32_t dynamicTag, sizeDynamicTag; SmallVector relocs; @@ -598,17 +596,15 @@ private: }; struct RelativeReloc { - uint64_t getOffset() const { - return inputSec->getVA(inputSec->relocs()[relocIdx].offset); - } + uint64_t getOffset() const { return inputSec->getVA(offsetInSec); } const InputSectionBase *inputSec; - size_t relocIdx; + uint64_t offsetInSec; }; class RelrBaseSection : public SyntheticSection { public: - RelrBaseSection(unsigned concurrency, bool isAArch64Auth = false); + RelrBaseSection(unsigned concurrency); void mergeRels(); bool isNeeded() const override { return !relocs.empty() || @@ -626,7 +622,7 @@ template class RelrSection final : public RelrBaseSection { using Elf_Relr = typename ELFT::Relr; public: - RelrSection(unsigned concurrency, bool isAArch64Auth = false); + RelrSection(unsigned concurrency); bool updateAllocSize() override; size_t getSize() const override { return relrRelocs.size() * this->entsize; } @@ -1464,7 +1460,6 @@ struct Partition { std::unique_ptr packageMetadataNote; std::unique_ptr relaDyn; std::unique_ptr relrDyn; - std::unique_ptr relrAuthDyn; std::unique_ptr verDef; std::unique_ptr verNeed; std::unique_ptr verSym; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 0aceb94..640cb2a 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1458,32 +1458,9 @@ template void Writer::finalizeAddressDependentContent() { in.mipsGot->updateAllocSize(); for (Partition &part : partitions) { - // The R_AARCH64_AUTH_RELATIVE has a smaller addend field as bits [63:32] - // encode the signing schema. We've put relocations in .relr.auth.dyn - // during RelocationScanner::processAux, but the target VA for some of - // them might be wider than 32 bits. We can only know the final VA at this - // point, so move relocations with large values from .relr.auth.dyn to - // .rela.dyn. See also AArch64::relocate. - if (part.relrAuthDyn) { - auto it = llvm::remove_if( - part.relrAuthDyn->relocs, [&part](const RelativeReloc &elem) { - const Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx]; - if (isInt<32>(reloc.sym->getVA(reloc.addend))) - return false; - part.relaDyn->addReloc({R_AARCH64_AUTH_RELATIVE, elem.inputSec, - reloc.offset, - DynamicReloc::AddendOnlyWithTargetVA, - *reloc.sym, reloc.addend, R_ABS}); - return true; - }); - changed |= (it != part.relrAuthDyn->relocs.end()); - part.relrAuthDyn->relocs.erase(it, part.relrAuthDyn->relocs.end()); - } changed |= part.relaDyn->updateAllocSize(); if (part.relrDyn) changed |= part.relrDyn->updateAllocSize(); - if (part.relrAuthDyn) - changed |= part.relrAuthDyn->updateAllocSize(); if (part.memtagGlobalDescriptors) changed |= part.memtagGlobalDescriptors->updateAllocSize(); } @@ -1647,14 +1624,6 @@ static void removeUnusedSyntheticSections() { auto *sec = cast(s); if (sec->getParent() && sec->isNeeded()) return false; - // .relr.auth.dyn relocations may be moved to .rela.dyn in - // finalizeAddressDependentContent, making .rela.dyn no longer empty. - // Conservatively keep .rela.dyn. .relr.auth.dyn can be made empty, but - // we would fail to remove it here. - if (config->emachine == EM_AARCH64 && config->relrPackDynRelocs) - if (auto *relSec = dyn_cast(sec)) - if (relSec == mainPart->relaDyn.get()) - return false; unused.insert(sec); return true; }); @@ -1967,10 +1936,6 @@ template void Writer::finalizeSections() { part.relrDyn->mergeRels(); finalizeSynthetic(part.relrDyn.get()); } - if (part.relrAuthDyn) { - part.relrAuthDyn->mergeRels(); - finalizeSynthetic(part.relrAuthDyn.get()); - } finalizeSynthetic(part.dynSymTab.get()); finalizeSynthetic(part.gnuHashTab.get()); diff --git a/lld/test/ELF/aarch64-reloc-pauth.s b/lld/test/ELF/aarch64-reloc-pauth.s index f1ce29e..b603d8f 100644 --- a/lld/test/ELF/aarch64-reloc-pauth.s +++ b/lld/test/ELF/aarch64-reloc-pauth.s @@ -1,13 +1,11 @@ # REQUIRES: aarch64 -# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=aarch64 %p/Inputs/shared2.s -o %t.a.o +# RUN: ld.lld -shared %t.a.o -soname=so -o %t.a.so +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t.o -# RUN: llvm-mc -filetype=obj -triple=aarch64 %p/Inputs/shared2.s -o a.o -# RUN: ld.lld -shared a.o -soname=so -o a.so -# RUN: llvm-mc -filetype=obj -triple=aarch64 main.s -o main.o - -# RUN: ld.lld -pie main.o a.so -o main -# RUN: llvm-readobj -r main | FileCheck --check-prefix=UNPACKED %s +# RUN: ld.lld -pie %t.o %t.a.so -o %t +# RUN: llvm-readobj -r %t | FileCheck --check-prefix=UNPACKED %s # UNPACKED: Section ({{.+}}) .rela.dyn { # UNPACKED-NEXT: 0x30470 R_AARCH64_AUTH_RELATIVE - 0x1 @@ -23,8 +21,8 @@ # UNPACKED-NEXT: 0x304B0 R_AARCH64_AUTH_ABS64 bar2 0x0 # UNPACKED-NEXT: } -# RUN: ld.lld main.o a.so -o main.nopie -# RUN: llvm-readobj -r main.nopie | FileCheck --check-prefix=NOPIE %s +# RUN: ld.lld %t.o %t.a.so -o %t.nopie +# RUN: llvm-readobj -r %t.nopie | FileCheck --check-prefix=NOPIE %s # NOPIE: Section ({{.+}}) .rela.dyn { # NOPIE: 0x230460 R_AARCH64_AUTH_RELATIVE - 0x200001 @@ -40,95 +38,67 @@ # NOPIE-NEXT: 0x2304A0 R_AARCH64_AUTH_ABS64 bar2 0x0 # NOPIE-NEXT: } -# RUN: ld.lld -pie -z pack-relative-relocs main.o a.so -o main.pie -# RUN: llvm-readelf -S -d -r -x .test main.pie | FileCheck --check-prefixes=RELR,HEX %s - -# RELR: Section Headers: -# RELR-NEXT: Name Type Address Off Size ES Flg Lk Inf Al -# RELR: .rela.dyn RELA {{0*}}[[ADDR1:.+]] {{0*}}[[ADDR1]] 000090 18 A 1 0 8 -# RELR: .relr.auth.dyn AARCH64_AUTH_RELR {{0*}}[[ADDR2:.+]] {{0*}}[[ADDR2]] 000018 08 A 0 0 8 - -# RELR: Dynamic section at offset {{.+}} contains 16 entries -# RELR: 0x0000000070000012 (AARCH64_AUTH_RELR) 0x[[ADDR2]] -# RELR-NEXT: 0x0000000070000011 (AARCH64_AUTH_RELRSZ) 24 (bytes) -# RELR-NEXT: 0x0000000070000013 (AARCH64_AUTH_RELRENT) 8 (bytes) - -## Decoded SHT_RELR section is same as UNPACKED, -## but contains only the relative relocations. -## Any relative relocations with odd offset or value wider than 32 bits stay in SHT_RELA. - -# RELR: Relocation section '.rela.dyn' at offset 0x[[ADDR1]] contains 6 entries: -# RELR-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -# RELR-NEXT: 0000000000030460 0000000000000411 R_AARCH64_AUTH_RELATIVE 123456789a -# RELR-NEXT: 0000000000030468 0000000000000411 R_AARCH64_AUTH_RELATIVE ffffffedcba98766 -# RELR-NEXT: 0000000000030470 0000000000000411 R_AARCH64_AUTH_RELATIVE 8003043f -# RELR-NEXT: 0000000000030489 0000000000000411 R_AARCH64_AUTH_RELATIVE 4 -# RELR-NEXT: 0000000000030478 0000000100000244 R_AARCH64_AUTH_ABS64 0000000000000000 zed2 + 1111 -# RELR-NEXT: 0000000000030480 0000000200000244 R_AARCH64_AUTH_ABS64 0000000000000000 bar2 + 0 -# RELR-EMPTY: -# RELR-NEXT: Relocation section '.relr.auth.dyn' at offset 0x[[ADDR2]] contains 5 entries: -# RELR-NEXT: Index: Entry Address Symbolic Address -# RELR-NEXT: 0000: 0000000000030440 0000000000030440 $d.0 -# RELR-NEXT: 0001: 000000000000000f 0000000000030448 $d.0 + 0x8 -# RELR-NEXT: 0000000000030450 $d.0 + 0x10 -# RELR-NEXT: 0000000000030458 $d.0 + 0x18 -# RELR-NEXT: 0002: 0000000000030492 0000000000030492 $d.0 + 0x52 +# RUN: ld.lld -pie %t.o %t.a.so -o %t.pie +# RUN: llvm-readelf -S -d -r -x .test %t.pie | FileCheck --check-prefixes=PIE,HEX %s + +# PIE: Section Headers: +# PIE-NEXT: Name Type Address Off Size ES Flg Lk Inf Al +# PIE: .rela.dyn RELA {{0*}}[[#%x,ADDR1:]] +# PIE-SAME: {{0*}}[[#ADDR1]] 000108 18 A 1 0 8 + +# PIE: Relocation section '.rela.dyn' at offset 0x[[#ADDR1]] contains 11 entries: +# PIE-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# PIE-NEXT: 0000000000030470 0000000000000411 R_AARCH64_AUTH_RELATIVE 1 +# PIE-NEXT: 0000000000030478 0000000000000411 R_AARCH64_AUTH_RELATIVE 30472 +# PIE-NEXT: 0000000000030480 0000000000000411 R_AARCH64_AUTH_RELATIVE fffffffffffffffd +# PIE-NEXT: 0000000000030488 0000000000000411 R_AARCH64_AUTH_RELATIVE 12345678 +# PIE-NEXT: 0000000000030490 0000000000000411 R_AARCH64_AUTH_RELATIVE 123456789a +# PIE-NEXT: 0000000000030498 0000000000000411 R_AARCH64_AUTH_RELATIVE ffffffedcba98766 +# PIE-NEXT: 00000000000304a0 0000000000000411 R_AARCH64_AUTH_RELATIVE 8003046f +# PIE-NEXT: 00000000000304b9 0000000000000411 R_AARCH64_AUTH_RELATIVE 4 +# PIE-NEXT: 00000000000304c2 0000000000000411 R_AARCH64_AUTH_RELATIVE 30475 +# PIE-NEXT: 00000000000304a8 0000000100000244 R_AARCH64_AUTH_ABS64 0000000000000000 zed2 + 1111 +# PIE-NEXT: 00000000000304b0 0000000200000244 R_AARCH64_AUTH_ABS64 0000000000000000 bar2 + 0 # HEX: Hex dump of section '.test': -# HEX-NEXT: 0x00030440 01000000 2a000020 42040300 2b000000 -## ^^^^^^^^ Implicit val = 1 = __ehdr_start + 1 +# HEX-NEXT: 0x00030470 00000000 2a000020 00000000 2b000000 ## ^^^^ Discr = 42 ## ^^ Key (bits 5..6) = DA -## ^^^^^^^^ Implicit val = 0x30442 = 0x30440 + 2 = .test + 2 ## ^^^^ Discr = 43 ## ^^ Key (bits 5..6) = IA -# HEX-NEXT: 0x00030450 fdffffff 2c000080 78563412 2d000020 -## ^^^^^^^^ Implicit val = -3 = __ehdr_start - 3 +# HEX-NEXT: 0x00030480 00000000 2c000080 00000000 2d000020 ## ^^^^ Discr = 44 ## ^^ Key (bits 5..6) = IA ## ^^ Addr diversity (bit 7) = true -## ^^^^^^^^ Implicit val = 0x12345678 = __ehdr_start + 0x12345678 ## ^^^^ Discr = 45 ## ^^ Key (bits 5..6) = DA -# HEX-NEXT: 0x00030460 00000000 2e000020 00000000 2f000020 -## ^^^^^^^^ No implicit val (rela reloc due val wider than 32 bits) +# HEX-NEXT: 0x00030490 00000000 2e000020 00000000 2f000020 ## ^^^^ Discr = 46 ## ^^ Key (bits 5..6) = DA -## ^^^^^^^^ No implicit val (rela reloc due to val wider than 32 bits) ## ^^^^ Discr = 47 ## ^^ Key (bits 5..6) = DA -# HEX-NEXT: 0x00030470 00000000 30000020 00000000 31000020 -## ^^^^^^^^ No implicit val (rela reloc due val wider than 32 bits) +# HEX-NEXT: 0x000304a0 00000000 30000020 00000000 31000020 ## ^^^^ Discr = 48 ## ^^ Key (bits 5..6) = DA -## ^^^^^^^^ No implicit val (rela reloc due to a preemptible symbol) ## ^^^^ Discr = 49 ## ^^ Key (bits 5..6) = DA -# HEX-NEXT: 0x00030480 00000000 32000000 77000000 00330000 -## ^^^^^^^^ No implicit val (rela reloc due to a preemptible symbol) +# HEX-NEXT: 0x000304b0 00000000 32000000 77000000 00330000 ## ^^^^ Discr = 50 ## ^^ Key (bits 5..6) = IA -## ^^^^^^ ^^ No implicit val (rela reloc due to odd offset) ## ^^^^ Discr = 51 -# HEX-NEXT: 0x00030490 20774504 03003400 0020{{\ }} +# HEX-NEXT: 0x000304c0 20770000 00003400 0020{{\ }} ## ^^ Key (bits 5..6) = DA -## ^^^^ ^^^^ Implicit val = 0x30445 = 0x30440 + 5 = .test + 5 ## ^^^^ Discr = 52 ## ^^ Key (bits 5..6) = DA -#--- main.s - .section .test, "aw" .p2align 3 .quad (__ehdr_start + 1)@AUTH(da,42) .quad (.test + 2)@AUTH(ia,43) .quad (__ehdr_start - 3)@AUTH(ia,44,addr) .quad (__ehdr_start + 0x12345678)@AUTH(da,45) -## Addend wider than 32 bits, not enough room for storing implicitly, would go to rela .quad (__ehdr_start + 0x123456789A)@AUTH(da,46) -## Negative addend wider than 32 bits, not enough room for storing implicitly, would go to rela .quad (__ehdr_start - 0x123456789A)@AUTH(da,47) -## INT32_MAX plus non-zero .test is wider than 32 bits, not enough room for storing implicitly, would go to rela .quad (.test + 0x7FFFFFFF)@AUTH(da,48) .quad (zed2 + 0x1111)@AUTH(da,49) .quad bar2@AUTH(ia,50) @@ -136,71 +106,3 @@ .quad (__ehdr_start + 4)@AUTH(da,51) .byte 0x77 .quad (.test + 5)@AUTH(da,52) - -#--- empty-relr.s - -## .relr.auth.dyn relocations that do not fit 32 bits are moved to .rela.dyn. -## In this case .relr.auth.dyn will be made empty, but -## removeUnusedSyntheticSections fails to remove the section. - -# RUN: llvm-mc -filetype=obj -triple=aarch64 empty-relr.s -o empty-relr.o -# RUN: ld.lld -pie -z pack-relative-relocs empty-relr.o -o empty-relr -# RUN: llvm-readelf -S -d -r empty-relr | FileCheck --check-prefixes=EMPTY-RELR %s - -# EMPTY-RELR: Section Headers: -# EMPTY-RELR-NEXT: Name Type Address Off Size ES Flg Lk Inf Al -# EMPTY-RELR: .rela.dyn RELA {{0*}}[[ADDR1:.+]] {{0*}}[[ADDR1]] 000018 18 A 0 0 8 -# EMPTY-RELR: .relr.auth.dyn AARCH64_AUTH_RELR {{0*}}[[ADDR2:.+]] {{0*}}[[ADDR2]] 000000 08 A 0 0 8 - -# EMPTY-RELR: Dynamic section at offset {{.+}} contains 12 entries -# EMPTY-RELR-NOT: (AARCH64_AUTH_RELR) -# EMPTY-RELR-NOT: (AARCH64_AUTH_RELRSZ) -# EMPTY-RELR-NOT: (AARCH64_AUTH_RELRENT) -# EMPTY-RELR: 0x0000000000000007 (RELA) 0x[[ADDR1]] -# EMPTY-RELR-NEXT: 0x0000000000000008 (RELASZ) 24 (bytes) -# EMPTY-RELR-NEXT: 0x0000000000000009 (RELAENT) 24 (bytes) - -# EMPTY-RELR: Relocation section '.rela.dyn' at offset {{.+}} contains 1 entries: -# EMPTY-RELR-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -# EMPTY-RELR-NEXT: 0000000000030320 0000000000000411 R_AARCH64_AUTH_RELATIVE 8003031f -# EMPTY-RELR-EMPTY: -# EMPTY-RELR-NEXT: Relocation section '.relr.auth.dyn' at offset {{.+}} contains 0 entries: -# EMPTY-RELR-NEXT: Index: Entry Address Symbolic Address - -.section .test, "aw" -.p2align 3 -.quad (.test + 0x7FFFFFFF)@AUTH(da,42) - -#--- empty-rela.s - -## .relr.auth.dyn relocations that do not fit 32 bits are moved to .rela.dyn. -## If this scenario does not happen, .rela.dyn will remain empty, -## but removeUnusedSyntheticSections fails to remove the section. - -# RUN: llvm-mc -filetype=obj -triple=aarch64 empty-rela.s -o empty-rela.o -# RUN: ld.lld -pie -z pack-relative-relocs empty-rela.o -o empty-rela -# RUN: llvm-readelf -S -d -r empty-rela | FileCheck --check-prefixes=EMPTY-RELA %s - -# EMPTY-RELA: Section Headers: -# EMPTY-RELA-NEXT: Name Type Address Off Size ES Flg Lk Inf Al -# EMPTY-RELA: .rela.dyn RELA {{0*}}[[ADDR1:.+]] {{0*}}[[ADDR1]] 000000 18 A 0 0 8 -# EMPTY-RELA: .relr.auth.dyn AARCH64_AUTH_RELR {{0*}}[[ADDR2:.+]] {{0*}}[[ADDR2]] 000008 08 A 0 0 8 - -# EMPTY-RELA: Dynamic section at offset {{.+}} contains 12 entries -# EMPTY-RELA-NOT: (RELR) -# EMPTY-RELA-NOT: (RELRSZ) -# EMPTY-RELA-NOT: (RELRENT) -# EMPTY-RELA: 0x0000000070000012 (AARCH64_AUTH_RELR) 0x[[ADDR2]] -# EMPTY-RELA-NEXT: 0x0000000070000011 (AARCH64_AUTH_RELRSZ) 8 (bytes) -# EMPTY-RELA-NEXT: 0x0000000070000013 (AARCH64_AUTH_RELRENT) 8 (bytes) - -# EMPTY-RELA: Relocation section '.rela.dyn' at offset {{.+}} contains 0 entries: -# EMPTY-RELA-NEXT: Offset Info Type Symbol's Value Symbol's Name -# EMPTY-RELA-EMPTY: -# EMPTY-RELA-NEXT: Relocation section '.relr.auth.dyn' at offset {{.+}} contains 1 entries: -# EMPTY-RELA-NEXT: Index: Entry Address Symbolic Address -# EMPTY-RELA-NEXT: 0000: 0000000000030310 0000000000030310 $d.0 - -.section .test, "aw" -.p2align 3 -.quad (.test + 0x12345678)@AUTH(da,42) -- cgit v1.1 From a294e896535e76db0279bd86d2d9202646d6345d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 23:34:17 -0700 Subject: [RISCV] Replace VPseudoBinaryFV_VV with VPseudoBinaryV_VV. NFC --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 1a5fc1c..d081433 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2245,11 +2245,6 @@ multiclass VPseudoBinaryV_VV_RM; } -// Similar to VPseudoBinaryV_VV, but uses MxListF. -multiclass VPseudoBinaryFV_VV { - defm _VV : VPseudoBinary; -} - multiclass VPseudoBinaryFV_VV_RM { defm _VV : VPseudoBinaryRoundingMode; @@ -2901,7 +2896,7 @@ multiclass VPseudoVALU_VV_VX { multiclass VPseudoVSGNJ_VV_VF { foreach m = MxListF in { foreach e = SchedSEWSet.val in - defm "" : VPseudoBinaryFV_VV, + defm "" : VPseudoBinaryV_VV, SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX, e, forceMergeOpRead=true>; } @@ -2918,7 +2913,7 @@ multiclass VPseudoVSGNJ_VV_VF { multiclass VPseudoVMAX_VV_VF { foreach m = MxListF in { foreach e = SchedSEWSet.val in - defm "" : VPseudoBinaryFV_VV, + defm "" : VPseudoBinaryV_VV, SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", m.MX, e, forceMergeOpRead=true>; } -- cgit v1.1 From 3f0f2cdcf51b5d87e616a3bf8bf73dacdfb134e2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 7 Jun 2024 23:57:43 -0700 Subject: [RISCV] Remove unnecessary setting of parameter with same default value. NFC --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index d081433..1af3732 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3291,21 +3291,21 @@ multiclass VPseudoTernaryW_VV { multiclass VPseudoTernaryW_VV_RM { defvar constraint = "@earlyclobber $rd"; defm _VV : VPseudoTernaryWithPolicyRoundingMode; } multiclass VPseudoTernaryW_VX { defvar constraint = "@earlyclobber $rd"; defm "_VX" : VPseudoTernaryWithPolicy; + constraint, TargetConstraintType=3>; } multiclass VPseudoTernaryW_VF_RM { defvar constraint = "@earlyclobber $rd"; defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode; } -- cgit v1.1 From 68852812ff00d915bc96816a1454eb7d25cb0cb5 Mon Sep 17 00:00:00 2001 From: Ben Barham Date: Sat, 8 Jun 2024 00:32:35 -0700 Subject: [Support] Do not use `llvm::size` in `getLoopPreheader` (#94540) `BlockT *LoopBase::getLoopPreheader()` was changed in 7243607867393a2b8ccd477e95e6f62d00f3206f to use `llvm::size` rather than the checking that `child_begin() + 1 == child_end()`. `llvm::size` requires that `std::distance` be O(1) and hence that clients support random access. Use `llvm::hasSingleElement` instead. --- llvm/include/llvm/Support/GenericLoopInfoImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h index 1e0d0ee..d190227 100644 --- a/llvm/include/llvm/Support/GenericLoopInfoImpl.h +++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h @@ -208,7 +208,7 @@ BlockT *LoopBase::getLoopPreheader() const { return nullptr; // Make sure there is only one exit out of the preheader. - if (llvm::size(llvm::children(Out)) != 1) + if (!llvm::hasSingleElement(llvm::children(Out))) return nullptr; // Multiple exits from the block, must not be a preheader. // The predecessor has exactly one successor, so it is a preheader. -- cgit v1.1 From 7f5d1f116477388067b1fd865e56eb3868993998 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Sat, 8 Jun 2024 09:37:23 +0200 Subject: [SystemZ] Fix handling of triples. Some Ubuntu builds were broken after 20d497c "[Driver] Remove unneeded *-linux-gnu after D158183". This patch by Fangrui Song fixes this with a handling in config.guess. --- llvm/cmake/config.guess | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess index f489623..2444ed7 100644 --- a/llvm/cmake/config.guess +++ b/llvm/cmake/config.guess @@ -4,7 +4,7 @@ # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011 Free Software Foundation, Inc. -timestamp='2011-08-20' +timestamp='2024-06-07' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -1028,7 +1028,11 @@ EOF echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux + if [ "$(grep -Ei 'debian|ubuntu' /etc/lsb-release)" ]; then + echo ${UNAME_MACHINE}-linux-gnu + else + echo ${UNAME_MACHINE}-ibm-linux + fi exit ;; sh64*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu -- cgit v1.1 From 6b3e0002dfe0029487fc2f8f11f5d5fdc07a5e11 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 8 Jun 2024 10:26:17 +0200 Subject: [mlir][Transforms][NFC] `GreedyPatternRewriteDriver`: Use composition instead of inheritance (#92785) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit simplifies the design of the `GreedyPatternRewriterDriver` class. This class used to inherit from both `PatternRewriter` and `RewriterBase::Listener` and then attached itself as a listener. In the new design, the class has a `PatternRewriter` field instead of inheriting from `PatternRewriter`, which is generally perferred in object-oriented programming. --------- Co-authored-by: Markus Böck --- mlir/include/mlir/IR/PatternMatch.h | 1 + .../Utils/GreedyPatternRewriteDriver.cpp | 40 ++++++++++++---------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 2562301..ed7b9ec 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -784,6 +784,7 @@ public: /// place. class PatternRewriter : public RewriterBase { public: + explicit PatternRewriter(MLIRContext *ctx) : RewriterBase(ctx) {} using RewriterBase::RewriterBase; /// A hook used to indicate if the pattern rewriter can recover from failure diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp index cfd4f9c0..597cb29 100644 --- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp +++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp @@ -319,8 +319,7 @@ private: /// This abstract class manages the worklist and contains helper methods for /// rewriting ops on the worklist. Derived classes specify how ops are added /// to the worklist in the beginning. -class GreedyPatternRewriteDriver : public PatternRewriter, - public RewriterBase::Listener { +class GreedyPatternRewriteDriver : public RewriterBase::Listener { protected: explicit GreedyPatternRewriteDriver(MLIRContext *ctx, const FrozenRewritePatternSet &patterns, @@ -339,7 +338,8 @@ protected: /// Notify the driver that the specified operation was inserted. Update the /// worklist as needed: The operation is enqueued depending on scope and /// strict mode. - void notifyOperationInserted(Operation *op, InsertPoint previous) override; + void notifyOperationInserted(Operation *op, + OpBuilder::InsertPoint previous) override; /// Notify the driver that the specified operation was removed. Update the /// worklist as needed: The operation and its children are removed from the @@ -354,6 +354,10 @@ protected: /// reached. Return `true` if any IR was changed. bool processWorklist(); + /// The pattern rewriter that is used for making IR modifications and is + /// passed to rewrite patterns. + PatternRewriter rewriter; + /// The worklist for this transformation keeps track of the operations that /// need to be (re)visited. #ifdef MLIR_GREEDY_REWRITE_RANDOMIZER_SEED @@ -407,7 +411,7 @@ private: GreedyPatternRewriteDriver::GreedyPatternRewriteDriver( MLIRContext *ctx, const FrozenRewritePatternSet &patterns, const GreedyRewriteConfig &config) - : PatternRewriter(ctx), config(config), matcher(patterns) + : rewriter(ctx), config(config), matcher(patterns) #if MLIR_ENABLE_EXPENSIVE_PATTERN_API_CHECKS // clang-format off , expensiveChecks( @@ -423,9 +427,9 @@ GreedyPatternRewriteDriver::GreedyPatternRewriteDriver( #if MLIR_ENABLE_EXPENSIVE_PATTERN_API_CHECKS // Send IR notifications to the debug handler. This handler will then forward // all notifications to this GreedyPatternRewriteDriver. - setListener(&expensiveChecks); + rewriter.setListener(&expensiveChecks); #else - setListener(this); + rewriter.setListener(this); #endif // MLIR_ENABLE_EXPENSIVE_PATTERN_API_CHECKS } @@ -473,7 +477,7 @@ bool GreedyPatternRewriteDriver::processWorklist() { // If the operation is trivially dead - remove it. if (isOpTriviallyDead(op)) { - eraseOp(op); + rewriter.eraseOp(op); changed = true; LLVM_DEBUG(logResultWithLine("success", "operation is trivially dead")); @@ -505,8 +509,8 @@ bool GreedyPatternRewriteDriver::processWorklist() { // Op results can be replaced with `foldResults`. assert(foldResults.size() == op->getNumResults() && "folder produced incorrect number of results"); - OpBuilder::InsertionGuard g(*this); - setInsertionPoint(op); + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(op); SmallVector replacements; bool materializationSucceeded = true; for (auto [ofr, resultType] : @@ -519,7 +523,7 @@ bool GreedyPatternRewriteDriver::processWorklist() { } // Materialize Attributes as SSA values. Operation *constOp = op->getDialect()->materializeConstant( - *this, ofr.get(), resultType, op->getLoc()); + rewriter, ofr.get(), resultType, op->getLoc()); if (!constOp) { // If materialization fails, cleanup any operations generated for @@ -532,7 +536,7 @@ bool GreedyPatternRewriteDriver::processWorklist() { replacementOps.insert(replacement.getDefiningOp()); } for (Operation *op : replacementOps) { - eraseOp(op); + rewriter.eraseOp(op); } materializationSucceeded = false; @@ -547,7 +551,7 @@ bool GreedyPatternRewriteDriver::processWorklist() { } if (materializationSucceeded) { - replaceOp(op, replacements); + rewriter.replaceOp(op, replacements); changed = true; LLVM_DEBUG(logSuccessfulFolding(dumpRootOp)); #if MLIR_ENABLE_EXPENSIVE_PATTERN_API_CHECKS @@ -608,7 +612,7 @@ bool GreedyPatternRewriteDriver::processWorklist() { #endif // MLIR_ENABLE_EXPENSIVE_PATTERN_API_CHECKS LogicalResult matchResult = - matcher.matchAndRewrite(op, *this, canApply, onFailure, onSuccess); + matcher.matchAndRewrite(op, rewriter, canApply, onFailure, onSuccess); if (succeeded(matchResult)) { LLVM_DEBUG(logResultWithLine("success", "pattern matched")); @@ -664,8 +668,8 @@ void GreedyPatternRewriteDriver::notifyBlockErased(Block *block) { config.listener->notifyBlockErased(block); } -void GreedyPatternRewriteDriver::notifyOperationInserted(Operation *op, - InsertPoint previous) { +void GreedyPatternRewriteDriver::notifyOperationInserted( + Operation *op, OpBuilder::InsertPoint previous) { LLVM_DEBUG({ logger.startLine() << "** Insert : '" << op->getName() << "'(" << op << ")\n"; @@ -822,7 +826,7 @@ private: LogicalResult RegionPatternRewriteDriver::simplify(bool *changed) && { bool continueRewrites = false; int64_t iteration = 0; - MLIRContext *ctx = getContext(); + MLIRContext *ctx = rewriter.getContext(); do { // Check if the iteration limit was reached. if (++iteration > config.maxIterations && @@ -834,7 +838,7 @@ LogicalResult RegionPatternRewriteDriver::simplify(bool *changed) && { // `OperationFolder` CSE's constant ops (and may move them into parents // regions to enable more aggressive CSE'ing). - OperationFolder folder(getContext(), this); + OperationFolder folder(ctx, this); auto insertKnownConstant = [&](Operation *op) { // Check for existing constants when populating the worklist. This avoids // accidentally reversing the constant order during processing. @@ -872,7 +876,7 @@ LogicalResult RegionPatternRewriteDriver::simplify(bool *changed) && { // After applying patterns, make sure that the CFG of each of the // regions is kept up to date. if (config.enableRegionSimplification) - continueRewrites |= succeeded(simplifyRegions(*this, region)); + continueRewrites |= succeeded(simplifyRegions(rewriter, region)); }, {®ion}, iteration); } while (continueRewrites); -- cgit v1.1 From 9ddc014f1a588608af1f08051d084c5839a41a80 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Sat, 8 Jun 2024 11:20:31 +0200 Subject: [clang] Report erroneous floating point results in _Complex math (#90588) Use handleFloatFloatBinOp to properly diagnose NaN results and divisions by zero. Fixes #84871 --- clang/lib/AST/ExprConstant.cpp | 27 +++++++++++---- clang/test/SemaCXX/complex-folding.cpp | 61 +++++++++++++++++++--------------- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f1aa19e..86fb396 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15209,11 +15209,21 @@ bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { APFloat &ResI = Result.getComplexFloatImag(); if (LHSReal) { assert(!RHSReal && "Cannot have two real operands for a complex op!"); - ResR = A * C; - ResI = A * D; + ResR = A; + ResI = A; + // ResR = A * C; + // ResI = A * D; + if (!handleFloatFloatBinOp(Info, E, ResR, BO_Mul, C) || + !handleFloatFloatBinOp(Info, E, ResI, BO_Mul, D)) + return false; } else if (RHSReal) { - ResR = C * A; - ResI = C * B; + // ResR = C * A; + // ResI = C * B; + ResR = C; + ResI = C; + if (!handleFloatFloatBinOp(Info, E, ResR, BO_Mul, A) || + !handleFloatFloatBinOp(Info, E, ResI, BO_Mul, B)) + return false; } else { // In the fully general case, we need to handle NaNs and infinities // robustly. @@ -15289,8 +15299,13 @@ bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { APFloat &ResR = Result.getComplexFloatReal(); APFloat &ResI = Result.getComplexFloatImag(); if (RHSReal) { - ResR = A / C; - ResI = B / C; + ResR = A; + ResI = B; + // ResR = A / C; + // ResI = B / C; + if (!handleFloatFloatBinOp(Info, E, ResR, BO_Div, C) || + !handleFloatFloatBinOp(Info, E, ResI, BO_Div, C)) + return false; } else { if (LHSReal) { // No real optimizations we can do here, stub out with zero. diff --git a/clang/test/SemaCXX/complex-folding.cpp b/clang/test/SemaCXX/complex-folding.cpp index 054f159..7bfd36f 100644 --- a/clang/test/SemaCXX/complex-folding.cpp +++ b/clang/test/SemaCXX/complex-folding.cpp @@ -59,41 +59,48 @@ static_assert((1.25 / (0.25 - 0.75j)) == (0.5 + 1.5j)); // Test that infinities are preserved, don't turn into NaNs, and do form zeros // when the divisor. +constexpr _Complex float InfC = {1.0, __builtin_inf()}; +constexpr _Complex float InfInf = __builtin_inf() + InfC; +static_assert(__real__(InfInf) == __builtin_inf()); +static_assert(__imag__(InfInf) == __builtin_inf()); +static_assert(__builtin_isnan(__real__(InfInf * InfInf))); +static_assert(__builtin_isinf_sign(__imag__(InfInf * InfInf)) == 1); + static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) * 1.0)) == 1); -static_assert(__builtin_isinf_sign(__imag__((1.0 + __builtin_inf() * 1.0j) * 1.0)) == 1); +static_assert(__builtin_isinf_sign(__imag__((1.0 + InfC) * 1.0)) == 1); static_assert(__builtin_isinf_sign(__real__(1.0 * (__builtin_inf() + 1.0j))) == 1); -static_assert(__builtin_isinf_sign(__imag__(1.0 * (1.0 + __builtin_inf() * 1.0j))) == 1); - +static_assert(__builtin_isinf_sign(__imag__(1.0 * (1.0 + InfC))) == 1); static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) * (1.0 + 1.0j))) == 1); static_assert(__builtin_isinf_sign(__real__((1.0 + 1.0j) * (__builtin_inf() + 1.0j))) == 1); static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) * (__builtin_inf() + 1.0j))) == 1); - -static_assert(__builtin_isinf_sign(__real__((1.0 + __builtin_inf() * 1.0j) * (1.0 + 1.0j))) == -1); -static_assert(__builtin_isinf_sign(__imag__((1.0 + __builtin_inf() * 1.0j) * (1.0 + 1.0j))) == 1); -static_assert(__builtin_isinf_sign(__real__((1.0 + 1.0j) * (1.0 + __builtin_inf() * 1.0j))) == -1); -static_assert(__builtin_isinf_sign(__imag__((1.0 + 1.0j) * (1.0 + __builtin_inf() * 1.0j))) == 1); - -static_assert(__builtin_isinf_sign(__real__((1.0 + __builtin_inf() * 1.0j) * (1.0 + __builtin_inf() * 1.0j))) == -1); -static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + __builtin_inf() * 1.0j) * (__builtin_inf() + __builtin_inf() * 1.0j))) == -1); - +static_assert(__builtin_isinf_sign(__real__((1.0 + InfC) * (1.0 + 1.0j))) == -1); +static_assert(__builtin_isinf_sign(__imag__((1.0 + InfC) * (1.0 + 1.0j))) == 1); +static_assert(__builtin_isinf_sign(__real__((1.0 + 1.0j) * (1.0 + InfC))) == -1); +static_assert(__builtin_isinf_sign(__imag__((1.0 + 1.0j) * (1.0 + InfC))) == 1); +static_assert(__builtin_isinf_sign(__real__((1.0 + InfC) * (1.0 + InfC))) == -1); +static_assert(__builtin_isinf_sign(__real__(InfInf * InfInf)) == 0); static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) / (1.0 + 1.0j))) == 1); -static_assert(__builtin_isinf_sign(__imag__(1.0 + (__builtin_inf() * 1.0j) / (1.0 + 1.0j))) == 1); -static_assert(__builtin_isinf_sign(__imag__((__builtin_inf() + __builtin_inf() * 1.0j) / (1.0 + 1.0j))) == 1); +static_assert(__builtin_isinf_sign(__imag__(1.0 + (InfC) / (1.0 + 1.0j))) == 1); +static_assert(__builtin_isinf_sign(__imag__((InfInf) / (1.0 + 1.0j))) == 0); static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) / 1.0)) == 1); -static_assert(__builtin_isinf_sign(__imag__(1.0 + (__builtin_inf() * 1.0j) / 1.0)) == 1); -static_assert(__builtin_isinf_sign(__imag__((__builtin_inf() + __builtin_inf() * 1.0j) / 1.0)) == 1); - +static_assert(__builtin_isinf_sign(__imag__(1.0 + (InfC) / 1.0)) == 1); +static_assert(__builtin_isinf_sign(__imag__((InfInf) / 1.0)) == 1); static_assert(((1.0 + 1.0j) / (__builtin_inf() + 1.0j)) == (0.0 + 0.0j)); -static_assert(((1.0 + 1.0j) / (1.0 + __builtin_inf() * 1.0j)) == (0.0 + 0.0j)); -static_assert(((1.0 + 1.0j) / (__builtin_inf() + __builtin_inf() * 1.0j)) == (0.0 + 0.0j)); +static_assert(((1.0 + 1.0j) / (1.0 + InfC)) == (0.0 + 0.0j)); +static_assert(((1.0 + 1.0j) / (InfInf)) == (0.0 + 0.0j)); static_assert(((1.0 + 1.0j) / __builtin_inf()) == (0.0 + 0.0j)); - +static_assert(1.0j / 0.0 == 1); // expected-error {{static assertion}} \ + // expected-note {{division by zero}} static_assert(__builtin_isinf_sign(__real__((1.0 + 1.0j) / (0.0 + 0.0j))) == 1); -static_assert(__builtin_isinf_sign(__real__((1.0 + 1.0j) / 0.0)) == 1); - +static_assert(__builtin_isinf_sign(__real__((1.0 + 1.0j) / 0.0)) == 1); // expected-error {{static assertion}} \ + // expected-note {{division by zero}} static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) / (0.0 + 0.0j))) == 1); -static_assert(__builtin_isinf_sign(__imag__((1.0 + __builtin_inf() * 1.0j) / (0.0 + 0.0j))) == 1); -static_assert(__builtin_isinf_sign(__imag__((__builtin_inf() + __builtin_inf() * 1.0j) / (0.0 + 0.0j))) == 1); -static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) / 0.0)) == 1); -static_assert(__builtin_isinf_sign(__imag__((1.0 + __builtin_inf() * 1.0j) / 0.0)) == 1); -static_assert(__builtin_isinf_sign(__imag__((__builtin_inf() + __builtin_inf() * 1.0j) / 0.0)) == 1); +static_assert(__builtin_isinf_sign(__imag__((1.0 + InfC) / (0.0 + 0.0j))) == 1); +static_assert(__builtin_isinf_sign(__imag__((InfInf) / (0.0 + 0.0j))) == 1); +static_assert(__builtin_isinf_sign(__real__((__builtin_inf() + 1.0j) / 0.0)) == 1); // expected-error {{static assertion}} \ + // expected-note {{division by zero}} +static_assert(__builtin_isinf_sign(__imag__((1.0 + InfC) / 0.0)) == 1); // expected-error {{static assertion}} \ + // expected-note {{division by zero}} +static_assert(__builtin_isinf_sign(__imag__((InfInf) / 0.0)) == 1); // expected-error {{static assertion}} \ + // expected-note {{division by zero}} + -- cgit v1.1 From 25506f48643b65e48c6bd501855589fff8983933 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Sat, 8 Jun 2024 11:31:13 +0200 Subject: [SDISel][Combine] Constant fold FP16_TO_FP (#94790) In some case, constant can survive early constant folding optimization because they are hidden behind several layers of type changes. E.g., consider the following sequence (extracted from the arm test that this commit changes): ``` t2: v1f16 = BUILD_VECTOR ConstantFP:f16 t4: v1f16 = insert_vector_elt t2, ConstantFP:f16, Constant:i32<0> t5: f16 = bitcast t4 t6: f32 = fp_extend t5 ``` Because the constant (APFloat(0)) is hidden behind a <1 x ty> type, all the constant folding that normally happen for scalar nodes when using `SelectionDAG::getNode` are blocked. As a result the constant manages to survive as an actual conversion instruction down to the select phase: ``` t11: f32 = fp16_to_fp Constant:i32<0> ``` With the change in this patch, we try to do constant folding one more time during dag combine, which in the motivating example result in the much better sequence: ``` t7: ch = CopyToReg t0, Register:f32 %0, ConstantFP:f32<0.000000e+00> ``` Note: I'm sure we have this problem in a lot of other places. Generally speaking I believe SDISel is not that good with <1 x ty> compared to pure scalar. However, I only changed what I could easily test. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++++++- llvm/test/CodeGen/AMDGPU/clamp-modifier.ll | 3 +-- llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll | 3 +-- llvm/test/CodeGen/ARM/arm-half-promote.ll | 4 +--- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2d5968b..70b3c7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -26586,7 +26586,12 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { } } - return SDValue(); + // Sometimes constants manage to survive very late in the pipeline, e.g., + // because they are wrapped inside the <1 x f16> type. Try one last time to + // get rid of them. + SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), + N->getValueType(0), {N0}); + return Folded; } SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll index 0a0179e..84bd9b6 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -1489,9 +1489,8 @@ define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(ptr addrspace(1) %ou ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 s[4:5], s[2:3] ; SI-NEXT: buffer_load_ushort v1, v[1:2], s[4:7], 0 addr64 -; SI-NEXT: v_cvt_f32_f16_e64 v3, s6 clamp +; SI-NEXT: v_cvt_f16_f32_e32 v3, 0 ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-NEXT: v_add_f32_e32 v1, 1.0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll b/llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll index 0d6e987..ba04cdb 100644 --- a/llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll @@ -14,9 +14,8 @@ define void @phi_vec1half_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: v_cvt_f32_f16_e64 v2, s4 ; CHECK-NEXT: ; %bb.1: ; %bb -; CHECK-NEXT: v_cvt_f16_f32_e64 v2, v2 +; CHECK-NEXT: v_cvt_f16_f32_e64 v2, s4 ; CHECK-NEXT: s_mov_b32 s7, 0xf000 ; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, s6 diff --git a/llvm/test/CodeGen/ARM/arm-half-promote.ll b/llvm/test/CodeGen/ARM/arm-half-promote.ll index a5fafd4..e1ab75b 100644 --- a/llvm/test/CodeGen/ARM/arm-half-promote.ll +++ b/llvm/test/CodeGen/ARM/arm-half-promote.ll @@ -116,9 +116,7 @@ define fastcc { <8 x half>, <8 x half> } @f3() { define void @extract_insert(ptr %dst) optnone noinline { ; CHECK-LABEL: extract_insert: -; CHECK: movs r1, #0 -; CHECK: vmov s0, r1 -; CHECK: vcvtb.f32.f16 s0, s0 +; CHECK: vmov.i32 d0, #0x0 ; CHECK: vcvtb.f16.f32 s0, s0 ; CHECK: vmov r1, s0 ; CHECK: strh r1, [r0] -- cgit v1.1 From cac7821438f625d6c8a36dd9363f9acd3a3d93de Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Sat, 8 Jun 2024 12:29:01 +0200 Subject: [compiler-rt] Replace deprecated aligned_storage with aligned byte array (#94171) `std::aligned_storage` is deprecated with C++23, see [here](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p1413r3.pdf). This replaces the usages of `std::aligned_storage` within compiler-rt with an aligned `std::byte` array. I will provide patches for other subcomponents as well. --- .../xray/tests/unit/function_call_trie_test.cpp | 8 +++---- .../lib/xray/tests/unit/profile_collector_test.cpp | 4 ++-- .../lib/xray/tests/unit/segmented_array_test.cpp | 8 +++---- compiler-rt/lib/xray/tests/unit/test_helpers.cpp | 3 +-- compiler-rt/lib/xray/xray_fdr_logging.cpp | 19 ++++++--------- compiler-rt/lib/xray/xray_function_call_trie.h | 20 +++++++--------- compiler-rt/lib/xray/xray_profile_collector.cpp | 27 ++++++++++------------ compiler-rt/lib/xray/xray_profiling.cpp | 13 ++++------- compiler-rt/lib/xray/xray_segmented_array.h | 3 +-- compiler-rt/test/tsan/custom_mutex4.cpp | 8 +++---- compiler-rt/test/tsan/custom_mutex5.cpp | 8 +++---- 11 files changed, 50 insertions(+), 71 deletions(-) diff --git a/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cpp b/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cpp index c90d663..b058e3c 100644 --- a/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cpp +++ b/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cpp @@ -310,16 +310,14 @@ TEST(FunctionCallTrieTest, MergeInto) { TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) { profilingFlags()->setDefaults(); - typename std::aligned_storage::type - AllocatorsStorage; + alignas(FunctionCallTrie::Allocators) + std::byte AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)]; new (&AllocatorsStorage) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); auto *A = reinterpret_cast(&AllocatorsStorage); - typename std::aligned_storage::type FCTStorage; + alignas(FunctionCallTrie) std::byte FCTStorage[sizeof(FunctionCallTrie)]; new (&FCTStorage) FunctionCallTrie(*A); auto *T = reinterpret_cast(&FCTStorage); diff --git a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cpp b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cpp index eab5579..da50642 100644 --- a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cpp +++ b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cpp @@ -38,8 +38,8 @@ struct ExpectedProfilingFileHeader { void ValidateFileHeaderBlock(XRayBuffer B) { ASSERT_NE(static_cast(B.Data), nullptr); ASSERT_EQ(B.Size, sizeof(ExpectedProfilingFileHeader)); - typename std::aligned_storage::type - FileHeaderStorage; + alignas(ExpectedProfilingFileHeader) + std::byte FileHeaderStorage[sizeof(ExpectedProfilingFileHeader)]; ExpectedProfilingFileHeader ExpectedHeader; std::memcpy(&FileHeaderStorage, B.Data, B.Size); auto &FileHeader = diff --git a/compiler-rt/lib/xray/tests/unit/segmented_array_test.cpp b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cpp index 46aeb88..26c80de 100644 --- a/compiler-rt/lib/xray/tests/unit/segmented_array_test.cpp +++ b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cpp @@ -226,13 +226,11 @@ TEST(SegmentedArrayTest, SimulateStackBehaviour) { TEST(SegmentedArrayTest, PlacementNewOnAlignedStorage) { using AllocatorType = typename Array::AllocatorType; - typename std::aligned_storage::type AllocatorStorage; + alignas(AllocatorType) std::byte AllocatorStorage[sizeof(AllocatorType)]; new (&AllocatorStorage) AllocatorType(1 << 10); auto *A = reinterpret_cast(&AllocatorStorage); - typename std::aligned_storage), - alignof(Array)>::type - ArrayStorage; + alignas(Array) + std::byte ArrayStorage[sizeof(Array)]; new (&ArrayStorage) Array(*A); auto *Data = reinterpret_cast *>(&ArrayStorage); diff --git a/compiler-rt/lib/xray/tests/unit/test_helpers.cpp b/compiler-rt/lib/xray/tests/unit/test_helpers.cpp index 6075f36..81a93d8 100644 --- a/compiler-rt/lib/xray/tests/unit/test_helpers.cpp +++ b/compiler-rt/lib/xray/tests/unit/test_helpers.cpp @@ -69,8 +69,7 @@ namespace __xray { std::string serialize(BufferQueue &Buffers, int32_t Version) { std::string Serialized; - std::aligned_storage::type - HeaderStorage; + alignas(XRayFileHeader) std::byte HeaderStorage[sizeof(XRayFileHeader)]; auto *Header = reinterpret_cast(&HeaderStorage); new (Header) XRayFileHeader(); Header->Version = Version; diff --git a/compiler-rt/lib/xray/xray_fdr_logging.cpp b/compiler-rt/lib/xray/xray_fdr_logging.cpp index 378a8c0f..7def356 100644 --- a/compiler-rt/lib/xray/xray_fdr_logging.cpp +++ b/compiler-rt/lib/xray/xray_fdr_logging.cpp @@ -55,17 +55,12 @@ struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { BufferQueue::Buffer Buffer{}; BufferQueue *BQ = nullptr; - using LogWriterStorage = - typename std::aligned_storage::type; - - LogWriterStorage LWStorage; + using LogWriterStorage = std::byte[sizeof(FDRLogWriter)]; + alignas(FDRLogWriter) LogWriterStorage LWStorage; FDRLogWriter *Writer = nullptr; - using ControllerStorage = - typename std::aligned_storage), - alignof(FDRController<>)>::type; - ControllerStorage CStorage; + using ControllerStorage = std::byte[sizeof(FDRController<>)]; + alignas(FDRController<>) ControllerStorage CStorage; FDRController<> *Controller = nullptr; }; @@ -78,7 +73,7 @@ static_assert(std::is_trivially_destructible::value, static pthread_key_t Key; // Global BufferQueue. -static std::aligned_storage::type BufferQueueStorage; +static std::byte BufferQueueStorage[sizeof(BufferQueue)]; static BufferQueue *BQ = nullptr; // Global thresholds for function durations. @@ -129,8 +124,8 @@ static_assert(alignof(ThreadLocalData) >= 64, "ThreadLocalData must be cache line aligned."); #endif static ThreadLocalData &getThreadLocalData() { - thread_local typename std::aligned_storage< - sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; + alignas(ThreadLocalData) thread_local std::byte + TLDStorage[sizeof(ThreadLocalData)]; if (pthread_getspecific(Key) == NULL) { new (reinterpret_cast(&TLDStorage)) ThreadLocalData{}; diff --git a/compiler-rt/lib/xray/xray_function_call_trie.h b/compiler-rt/lib/xray/xray_function_call_trie.h index b8c6058..7536f39 100644 --- a/compiler-rt/lib/xray/xray_function_call_trie.h +++ b/compiler-rt/lib/xray/xray_function_call_trie.h @@ -139,18 +139,14 @@ public: // Use hosted aligned storage members to allow for trivial move and init. // This also allows us to sidestep the potential-failing allocation issue. - typename std::aligned_storage::type - NodeAllocatorStorage; - typename std::aligned_storage::type - RootAllocatorStorage; - typename std::aligned_storage::type - ShadowStackAllocatorStorage; - typename std::aligned_storage::type - NodeIdPairAllocatorStorage; + alignas(NodeAllocatorType) std::byte + NodeAllocatorStorage[sizeof(NodeAllocatorType)]; + alignas(RootAllocatorType) std::byte + RootAllocatorStorage[sizeof(RootAllocatorType)]; + alignas(ShadowStackAllocatorType) std::byte + ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)]; + alignas(NodeIdPairAllocatorType) std::byte + NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)]; NodeAllocatorType *NodeAllocator = nullptr; RootAllocatorType *RootAllocator = nullptr; diff --git a/compiler-rt/lib/xray/xray_profile_collector.cpp b/compiler-rt/lib/xray/xray_profile_collector.cpp index bef2504..3a28240 100644 --- a/compiler-rt/lib/xray/xray_profile_collector.cpp +++ b/compiler-rt/lib/xray/xray_profile_collector.cpp @@ -29,7 +29,7 @@ namespace { SpinMutex GlobalMutex; struct ThreadTrie { tid_t TId; - typename std::aligned_storage::type TrieStorage; + alignas(FunctionCallTrie) std::byte TrieStorage[sizeof(FunctionCallTrie)]; }; struct ProfileBuffer { @@ -71,16 +71,13 @@ using ThreadDataAllocator = ThreadDataArray::AllocatorType; // by the ThreadData array. This lets us host the buffers, allocators, and tries // associated with a thread by moving the data into the array instead of // attempting to copy the data to a separately backed set of tries. -static typename std::aligned_storage< - sizeof(BufferQueue), alignof(BufferQueue)>::type BufferQueueStorage; +alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)]; static BufferQueue *BQ = nullptr; static BufferQueue::Buffer Buffer; -static typename std::aligned_storage::type - ThreadDataAllocatorStorage; -static typename std::aligned_storage::type - ThreadDataArrayStorage; +alignas(ThreadDataAllocator) static std::byte + ThreadDataAllocatorStorage[sizeof(ThreadDataAllocator)]; +alignas(ThreadDataArray) static std::byte + ThreadDataArrayStorage[sizeof(ThreadDataArray)]; static ThreadDataAllocator *TDAllocator = nullptr; static ThreadDataArray *TDArray = nullptr; @@ -91,10 +88,10 @@ using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType; // These need to be global aligned storage to avoid dynamic initialization. We // need these to be aligned to allow us to placement new objects into the // storage, and have pointers to those objects be appropriately aligned. -static typename std::aligned_storage::type - ProfileBuffersStorage; -static typename std::aligned_storage::type - ProfileBufferArrayAllocatorStorage; +alignas(ProfileBufferArray) static std::byte + ProfileBuffersStorage[sizeof(ProfileBufferArray)]; +alignas(ProfileBufferArrayAllocator) static std::byte + ProfileBufferArrayAllocatorStorage[sizeof(ProfileBufferArrayAllocator)]; static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; static ProfileBufferArray *ProfileBuffers = nullptr; @@ -382,8 +379,8 @@ XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { return {nullptr, 0}; static pthread_once_t Once = PTHREAD_ONCE_INIT; - static typename std::aligned_storage::type - FileHeaderStorage; + alignas(XRayProfilingFileHeader) static std::byte + FileHeaderStorage[sizeof(XRayProfilingFileHeader)]; pthread_once( &Once, +[]() XRAY_NEVER_INSTRUMENT { new (&FileHeaderStorage) XRayProfilingFileHeader{}; diff --git a/compiler-rt/lib/xray/xray_profiling.cpp b/compiler-rt/lib/xray/xray_profiling.cpp index 259ec65..e9ac2fd 100644 --- a/compiler-rt/lib/xray/xray_profiling.cpp +++ b/compiler-rt/lib/xray/xray_profiling.cpp @@ -48,17 +48,14 @@ static pthread_key_t ProfilingKey; // We use a global buffer queue, which gets initialized once at initialisation // time, and gets reset when profiling is "done". -static std::aligned_storage::type - BufferQueueStorage; +alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)]; static BufferQueue *BQ = nullptr; thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; -thread_local std::aligned_storage::type - AllocatorsStorage; -thread_local std::aligned_storage::type - FunctionCallTrieStorage; +alignas(FunctionCallTrie::Allocators) thread_local std::byte + AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)]; +alignas(FunctionCallTrie) thread_local std::byte + FunctionCallTrieStorage[sizeof(FunctionCallTrie)]; thread_local ProfilingData TLD{{0}, {0}}; thread_local atomic_uint8_t ReentranceGuard{0}; diff --git a/compiler-rt/lib/xray/xray_segmented_array.h b/compiler-rt/lib/xray/xray_segmented_array.h index 6eb673e..3ab174b 100644 --- a/compiler-rt/lib/xray/xray_segmented_array.h +++ b/compiler-rt/lib/xray/xray_segmented_array.h @@ -56,8 +56,7 @@ public: // kCacheLineSize-multiple segments, minus the size of two pointers. // // - Request cacheline-multiple sized elements from the allocator. - static constexpr uint64_t AlignedElementStorageSize = - sizeof(typename std::aligned_storage::type); + static constexpr uint64_t AlignedElementStorageSize = sizeof(T); static constexpr uint64_t SegmentControlBlockSize = sizeof(Segment *) * 2; diff --git a/compiler-rt/test/tsan/custom_mutex4.cpp b/compiler-rt/test/tsan/custom_mutex4.cpp index 539a8be..f7dfab0 100644 --- a/compiler-rt/test/tsan/custom_mutex4.cpp +++ b/compiler-rt/test/tsan/custom_mutex4.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t && %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_tsan -O1 --std=c++17 %s -o %t && %run %t 2>&1 | FileCheck %s #include "custom_mutex.h" -#include +#include // Test that the destruction events of a mutex are ignored when the // annotations request this. @@ -12,14 +12,14 @@ // has run. int main() { - std::aligned_storage::type mu1_store; + alignas(Mutex) std::byte mu1_store[sizeof(Mutex)]; Mutex* mu1 = reinterpret_cast(&mu1_store); new(&mu1_store) Mutex(false, __tsan_mutex_linker_init); mu1->Lock(); mu1->~Mutex(); mu1->Unlock(); - std::aligned_storage::type mu2_store; + alignas(Mutex) std::byte mu2_store[sizeof(Mutex)]; Mutex* mu2 = reinterpret_cast(&mu2_store); new(&mu2_store) Mutex(false, 0, __tsan_mutex_not_static); mu2->Lock(); diff --git a/compiler-rt/test/tsan/custom_mutex5.cpp b/compiler-rt/test/tsan/custom_mutex5.cpp index cb18b23..6d65829 100644 --- a/compiler-rt/test/tsan/custom_mutex5.cpp +++ b/compiler-rt/test/tsan/custom_mutex5.cpp @@ -1,20 +1,20 @@ -// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_tsan -O1 --std=c++17 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s #include "custom_mutex.h" -#include +#include // Test that we detect the destruction of an in-use mutex when the // thread annotations don't otherwise disable the check. int main() { - std::aligned_storage::type mu1_store; + alignas(Mutex) std::byte mu1_store[sizeof(Mutex)]; Mutex* mu1 = reinterpret_cast(&mu1_store); new(&mu1_store) Mutex(false, 0); mu1->Lock(); mu1->~Mutex(); mu1->Unlock(); - std::aligned_storage::type mu2_store; + alignas(Mutex) std::byte mu2_store[sizeof(Mutex)]; Mutex* mu2 = reinterpret_cast(&mu2_store); new(&mu2_store) Mutex(false, __tsan_mutex_not_static, __tsan_mutex_not_static); -- cgit v1.1 From 82f6cde8a98064be8e17cf57fed170b7205bc184 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 8 Jun 2024 20:13:38 +0900 Subject: lld/test: Make sure removing %t at first 2e1788f8e265 reverted #94843. It was creating `%t` as a directory and causes an error in incremental builds. --- lld/test/ELF/aarch64-reloc-pauth.s | 1 + 1 file changed, 1 insertion(+) diff --git a/lld/test/ELF/aarch64-reloc-pauth.s b/lld/test/ELF/aarch64-reloc-pauth.s index b603d8f..0cfcb16 100644 --- a/lld/test/ELF/aarch64-reloc-pauth.s +++ b/lld/test/ELF/aarch64-reloc-pauth.s @@ -1,5 +1,6 @@ # REQUIRES: aarch64 +# RUN: rm -rf %t # RUN: llvm-mc -filetype=obj -triple=aarch64 %p/Inputs/shared2.s -o %t.a.o # RUN: ld.lld -shared %t.a.o -soname=so -o %t.a.so # RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t.o -- cgit v1.1 From d4eed43badfcaba044b038b704b57ea130fd5e15 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Sat, 8 Jun 2024 16:23:17 +0400 Subject: Enable LLDB tests in Linux pre-merge CI (#94208) This patch removes LLDB from a list of projects that are excluded from building and testing on pre-merge CI on Linux. Windows environment needs to be prepared in order to test LLDB (https://github.com/llvm/llvm-project/pull/94208#issuecomment-2146256857), but we don't have enough maintenance resources to do that at the moment. Because LLDB has been in the list of projects that need to be tested on Clang changes, this PR make this happen on Linux. This seems to be the consensus in the discussion of this PR. --- .ci/generate-buildkite-pipeline-premerge | 5 ++--- .ci/monolithic-linux.sh | 1 + clang/examples/PrintFunctionNames/PrintFunctionNames.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge index 033ab80..fd603de 100755 --- a/.ci/generate-buildkite-pipeline-premerge +++ b/.ci/generate-buildkite-pipeline-premerge @@ -153,7 +153,6 @@ function exclude-linux() { for project in ${projects}; do case ${project} in cross-project-tests) ;; # tests failing - lldb) ;; # tests failing openmp) ;; # https://github.com/google/llvm-premerge-checks/issues/410 *) echo "${project}" @@ -170,7 +169,7 @@ function exclude-windows() { compiler-rt) ;; # tests taking too long openmp) ;; # TODO: having trouble with the Perl installation libc) ;; # no Windows support - lldb) ;; # tests failing + lldb) ;; # custom environment requirements (https://github.com/llvm/llvm-project/pull/94208#issuecomment-2146256857) bolt) ;; # tests are not supported yet *) echo "${project}" @@ -213,7 +212,7 @@ function check-targets() { echo "check-unwind" ;; lldb) - echo "check-all" # TODO: check-lldb may not include all the LLDB tests? + echo "check-lldb" ;; pstl) echo "check-all" diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh index 38d7128..b78dc59 100755 --- a/.ci/monolithic-linux.sh +++ b/.ci/monolithic-linux.sh @@ -39,6 +39,7 @@ targets="${2}" echo "--- cmake" pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt +pip install -q -r "${MONOREPO_ROOT}"/lldb/test/requirements.txt cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_ENABLE_PROJECTS="${projects}" \ -G Ninja \ diff --git a/clang/examples/PrintFunctionNames/PrintFunctionNames.cpp b/clang/examples/PrintFunctionNames/PrintFunctionNames.cpp index 6509a64..b2b785b 100644 --- a/clang/examples/PrintFunctionNames/PrintFunctionNames.cpp +++ b/clang/examples/PrintFunctionNames/PrintFunctionNames.cpp @@ -72,7 +72,7 @@ public: *sema.LateParsedTemplateMap.find(FD)->second; sema.LateTemplateParser(sema.OpaqueParser, LPT); llvm::errs() << "late-parsed-decl: \"" << FD->getNameAsString() << "\"\n"; - } + } } }; -- cgit v1.1 From 540f68c44f4813c2c16f92ccbba5a15e8ff87c85 Mon Sep 17 00:00:00 2001 From: DaPorkchop_ Date: Sat, 8 Jun 2024 15:32:34 +0200 Subject: [SimplifyCFG] Don't use a mask for lookup tables generated from switches with an unreachable default case (#94468) When transforming a switch with holes into a lookup table, we currently use a mask to check if the current index is handled by the switch or if it is a hole. If it is a hole, we skip loading from the lookup table. Normally, if the switch's default case is unreachable this has no impact, as the mask test gets optimized away by subsequent passes. However, if the switch is large enough that the number of lookup table entries exceeds the target's register width, we won't be able to fit all the cases into a mask and the switch won't get transformed into a lookup table. If we know that the switch's default case is unreachable, we know that the mask is unnecessary and can skip constructing it entirely, which allows us to transform the switch into a lookup table. [Example](https://godbolt.org/z/7x7qfx8M1) In the future, it might be interesting to consider allowing lookup table masks to be more than one register large (e.g. using a constant array of bit flags, similar to `std::bitset`). --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 30 +- .../SimplifyCFG/RISCV/switch-of-powers-of-two.ll | 4 - .../SimplifyCFG/X86/switch_to_lookup_table.ll | 45 +- .../SimplifyCFG/X86/switch_to_lookup_table_big.ll | 542 +++++++++++++++++++++ 4 files changed, 607 insertions(+), 14 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index fe6ec88..292739b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6743,8 +6743,25 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, TableSize = (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1; + // If the default destination is unreachable, or if the lookup table covers + // all values of the conditional variable, branch directly to the lookup table + // BB. Otherwise, check that the condition is within the case range. + bool DefaultIsReachable = !SI->defaultDestUndefined(); + bool TableHasHoles = (NumResults < TableSize); - bool NeedMask = (TableHasHoles && !HasDefaultResults); + + // If the table has holes but the default destination doesn't produce any + // constant results, the lookup table entries corresponding to the holes will + // contain undefined values. + bool AllHolesAreUndefined = TableHasHoles && !HasDefaultResults; + + // If the default destination doesn't produce a constant result but is still + // reachable, and the lookup table has holes, we need to use a mask to + // determine if the current index should load from the lookup table or jump + // to the default case. + // The mask is unnecessary if the table has holes but the default destination + // is unreachable, as in that case the holes must also be unreachable. + bool NeedMask = AllHolesAreUndefined && DefaultIsReachable; if (NeedMask) { // As an extra penalty for the validity test we require more cases. if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). @@ -6766,12 +6783,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, "It is impossible for a switch to have more entries than the max " "representable value of its input integer type's size."); - // If the default destination is unreachable, or if the lookup table covers - // all values of the conditional variable, branch directly to the lookup table - // BB. Otherwise, check that the condition is within the case range. - bool DefaultIsReachable = - !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create( @@ -6895,8 +6906,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, for (PHINode *PHI : PHIs) { const ResultListTy &ResultList = ResultLists[PHI]; - // If using a bitmask, use any value to fill the lookup table holes. - Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; + // Use any value to fill the lookup table holes. + Constant *DV = + AllHolesAreUndefined ? ResultLists[PHI][0].second : DefaultResults[PHI]; StringRef FuncName = Fn->getName(); SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV, DL, FuncName); diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll index 3ded78d..2ac94af 100644 --- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll @@ -34,10 +34,6 @@ define i32 @switch_of_powers(i32 %x) { ; RV64ZBB-LABEL: @switch_of_powers( ; RV64ZBB-NEXT: entry: ; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true) -; RV64ZBB-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP0]] to i8 -; RV64ZBB-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]] -; RV64ZBB-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1 -; RV64ZBB-NEXT: call void @llvm.assume(i1 [[SWITCH_LOBIT]]) ; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers, i32 0, i32 [[TMP0]] ; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 ; RV64ZBB-NEXT: ret i32 [[SWITCH_LOAD]] diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index d6450a2..845c500 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -38,6 +38,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: @switch.table.threecases = private unnamed_addr constant [3 x i32] [i32 10, i32 7, i32 5], align 4 ; CHECK: @switch.table.covered_switch_with_bit_tests = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 1], align 4 ; CHECK: @switch.table.signed_overflow1 = private unnamed_addr constant [4 x i32] [i32 3333, i32 4444, i32 1111, i32 2222], align 4 +; CHECK: @switch.table.signed_overflow2 = private unnamed_addr constant [4 x i32] [i32 3333, i32 4444, i32 2222, i32 2222], align 4 ;. define i32 @f(i32 %c) { ; CHECK-LABEL: @f( @@ -1738,12 +1739,53 @@ define i32 @signed_overflow2(i8 %n) { ; CHECK-LABEL: @signed_overflow2( ; CHECK-NEXT: start: ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2 -; CHECK-NEXT: switch i2 [[TRUNC]], label [[BB1:%.*]] [ +; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2 +; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow2, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] +; +start: + %trunc = trunc i8 %n to i2 + switch i2 %trunc, label %bb1 [ + i2 1, label %bb3 + i2 -2, label %bb4 + i2 -1, label %bb5 + ] + +bb1: ; preds = %start + unreachable + +bb3: ; preds = %start + br label %bb6 + +bb4: ; preds = %start + br label %bb6 + +bb5: ; preds = %start + br label %bb6 + +bb6: ; preds = %start, %bb3, %bb4, %bb5 + %.sroa.0.0 = phi i32 [ 4444, %bb5 ], [ 3333, %bb4 ], [ 2222, %bb3 ] + ret i32 %.sroa.0.0 +} + +; This is the same as @signed_overflow2 except that the default case calls @exit(), so it +; isn't treated as unreachable +define i32 @signed_overflow3(i8 %n) { +; CHECK-LABEL: @signed_overflow3( +; CHECK-NEXT: start: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2 +; CHECK-NEXT: switch i2 [[TRUNC]], label [[START_UNREACHABLEDEFAULT:%.*]] [ ; CHECK-NEXT: i2 1, label [[BB6:%.*]] ; CHECK-NEXT: i2 -2, label [[BB4:%.*]] ; CHECK-NEXT: i2 -1, label [[BB5:%.*]] +; CHECK-NEXT: i2 0, label [[BB1:%.*]] ; CHECK-NEXT: ] +; CHECK: start.unreachabledefault: +; CHECK-NEXT: unreachable ; CHECK: bb1: +; CHECK-NEXT: call void @exit(i32 1) ; CHECK-NEXT: unreachable ; CHECK: bb4: ; CHECK-NEXT: br label [[BB6]] @@ -1762,6 +1804,7 @@ start: ] bb1: ; preds = %start + call void @exit(i32 1) unreachable bb3: ; preds = %start diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll new file mode 100644 index 0000000..7988e30 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll @@ -0,0 +1,542 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt < %s -passes=simplifycfg -switch-to-lookup=true -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +; A dense switch with a reachable default case should be optimized into a lookup table with a bounds check +;. +; CHECK: @switch.table.reachable_default_dense_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1], align 4 +; CHECK: @switch.table.unreachable_default_dense_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1], align 4 +; CHECK: @switch.table.reachable_default_holes_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1], align 4 +; CHECK: @switch.table.unreachable_default_holes_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1], align 4 +; CHECK: @switch.table.reachable_default_dense_0to32 = private unnamed_addr constant [33 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 4 +; CHECK: @switch.table.unreachable_default_dense_0to32 = private unnamed_addr constant [33 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 4 +; CHECK: @switch.table.unreachable_default_holes_0to32 = private unnamed_addr constant [33 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1, i32 0], align 4 +;. +define i32 @reachable_default_dense_0to31(i32 %x, i32 %y) { +; CHECK-LABEL: @reachable_default_dense_0to31( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 32 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.reachable_default_dense_0to31, i32 0, i32 [[X]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 3, label %bb5 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 8, label %bb0 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 13, label %bb3 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 18, label %bb6 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 23, label %bb1 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 28, label %bb4 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + ] + +sw.default: br label %return +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} + +; A dense switch with an unreachable default case should be optimized into a lookup table without bounds checks +define i32 @unreachable_default_dense_0to31(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_default_dense_0to31( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.unreachable_default_dense_0to31, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 3, label %bb5 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 8, label %bb0 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 13, label %bb3 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 18, label %bb6 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 23, label %bb1 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 28, label %bb4 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + ] + +sw.default: unreachable +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} + +; A sparse switch with a reachable default case should be optimized into a lookup table with a bounds check and a mask +define i32 @reachable_default_holes_0to31(i32 %x, i32 %y) { +; CHECK-LABEL: @reachable_default_holes_0to31( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 32 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[RETURN:%.*]] +; CHECK: switch.hole_check: +; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i32 -277094665, [[X]] +; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i32 [[SWITCH_SHIFTED]] to i1 +; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.reachable_default_holes_0to31, i32 0, i32 [[X]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[SWITCH_HOLE_CHECK]] ], [ [[Y]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + ] + +sw.default: br label %return +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} + +; A sparse switch with an unreachable default case should be optimized into a lookup table without bounds checks +define i32 @unreachable_default_holes_0to31(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_default_holes_0to31( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.unreachable_default_holes_0to31, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + ] + +sw.default: unreachable +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} + +; A dense switch with a reachable default case should be optimized into a lookup table with a bounds check +define i32 @reachable_default_dense_0to32(i32 %x, i32 %y) { +; CHECK-LABEL: @reachable_default_dense_0to32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 33 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [33 x i32], ptr @switch.table.reachable_default_dense_0to32, i32 0, i32 [[X]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 3, label %bb5 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 8, label %bb0 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 13, label %bb3 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 18, label %bb6 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 23, label %bb1 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 28, label %bb4 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + i32 32, label %bb0 + ] + +sw.default: br label %return +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} + +; A dense switch with an unreachable default case should be optimized into a lookup table without bounds checks +define i32 @unreachable_default_dense_0to32(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_default_dense_0to32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [33 x i32], ptr @switch.table.unreachable_default_dense_0to32, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 3, label %bb5 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 8, label %bb0 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 13, label %bb3 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 18, label %bb6 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 23, label %bb1 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 28, label %bb4 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + i32 32, label %bb0 + ] + +sw.default: unreachable +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} + +; A sparse switch with a reachable default case which would be optimized into a lookup table with a bounds check and a mask, but doesn't because +; it would require a 33-bit mask +define i32 @reachable_default_holes_0to32(i32 %x, i32 %y) { +; CHECK-LABEL: @reachable_default_holes_0to32( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB0:%.*]] +; CHECK-NEXT: i32 1, label [[BB7:%.*]] +; CHECK-NEXT: i32 2, label [[BB6:%.*]] +; CHECK-NEXT: i32 4, label [[BB4:%.*]] +; CHECK-NEXT: i32 5, label [[BB3:%.*]] +; CHECK-NEXT: i32 6, label [[BB2:%.*]] +; CHECK-NEXT: i32 7, label [[BB1:%.*]] +; CHECK-NEXT: i32 9, label [[BB7]] +; CHECK-NEXT: i32 10, label [[BB6]] +; CHECK-NEXT: i32 11, label [[BB5:%.*]] +; CHECK-NEXT: i32 12, label [[BB4]] +; CHECK-NEXT: i32 14, label [[BB2]] +; CHECK-NEXT: i32 15, label [[BB1]] +; CHECK-NEXT: i32 16, label [[BB0]] +; CHECK-NEXT: i32 17, label [[BB7]] +; CHECK-NEXT: i32 19, label [[BB5]] +; CHECK-NEXT: i32 20, label [[BB4]] +; CHECK-NEXT: i32 21, label [[BB3]] +; CHECK-NEXT: i32 22, label [[BB2]] +; CHECK-NEXT: i32 24, label [[BB0]] +; CHECK-NEXT: i32 25, label [[BB7]] +; CHECK-NEXT: i32 26, label [[BB6]] +; CHECK-NEXT: i32 27, label [[BB5]] +; CHECK-NEXT: i32 29, label [[BB3]] +; CHECK-NEXT: i32 30, label [[BB2]] +; CHECK-NEXT: i32 31, label [[BB1]] +; CHECK-NEXT: i32 32, label [[BB0]] +; CHECK-NEXT: ] +; CHECK: bb0: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb1: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb2: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb3: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb4: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb5: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb6: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb7: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[BB0]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ], [ 5, [[BB5]] ], [ 6, [[BB6]] ], [ 7, [[BB7]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + i32 32, label %bb0 + ] + +sw.default: br label %return +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} + +; A sparse switch with an unreachable default case which can be optimized into a lookup table without bounds checks. Because the default case is +; unreachable, the fact that a 33-bit mask would be required doesn't prevent lookup table optimization. +define i32 @unreachable_default_holes_0to32(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_default_holes_0to32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [33 x i32], ptr @switch.table.unreachable_default_holes_0to32, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] +; +entry: + switch i32 %x, label %sw.default [ + i32 0, label %bb0 + i32 1, label %bb7 + i32 2, label %bb6 + i32 4, label %bb4 + i32 5, label %bb3 + i32 6, label %bb2 + i32 7, label %bb1 + i32 9, label %bb7 + i32 10, label %bb6 + i32 11, label %bb5 + i32 12, label %bb4 + i32 14, label %bb2 + i32 15, label %bb1 + i32 16, label %bb0 + i32 17, label %bb7 + i32 19, label %bb5 + i32 20, label %bb4 + i32 21, label %bb3 + i32 22, label %bb2 + i32 24, label %bb0 + i32 25, label %bb7 + i32 26, label %bb6 + i32 27, label %bb5 + i32 29, label %bb3 + i32 30, label %bb2 + i32 31, label %bb1 + i32 32, label %bb0 + ] + +sw.default: unreachable +bb0: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return +bb6: br label %return +bb7: br label %return + +return: + %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ] + ret i32 %res + +} -- cgit v1.1 From 2d21851fd560bd7a8f5d5cce22dc1c4966dc4092 Mon Sep 17 00:00:00 2001 From: FantasqueX Date: Sat, 8 Jun 2024 21:33:43 +0800 Subject: [llvm] Remove useless headers in example BrainF (#93701) --- llvm/examples/BrainF/BrainF.cpp | 1 - llvm/examples/BrainF/BrainFDriver.cpp | 3 --- 2 files changed, 4 deletions(-) diff --git a/llvm/examples/BrainF/BrainF.cpp b/llvm/examples/BrainF/BrainF.cpp index 1c7cacb..ac01961 100644 --- a/llvm/examples/BrainF/BrainF.cpp +++ b/llvm/examples/BrainF/BrainF.cpp @@ -37,7 +37,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Support/Casting.h" #include #include diff --git a/llvm/examples/BrainF/BrainFDriver.cpp b/llvm/examples/BrainF/BrainFDriver.cpp index 6448347..98fa735 100644 --- a/llvm/examples/BrainF/BrainFDriver.cpp +++ b/llvm/examples/BrainF/BrainFDriver.cpp @@ -28,7 +28,6 @@ #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -38,13 +37,11 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include -- cgit v1.1 From d9507a3e10d1750d88dd518c14b9a9a62b9eefcd Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 8 Jun 2024 21:40:57 +0800 Subject: [DAGCombine] Fix miscompilation caused by PR94008 (#94850) The pr description in #94008 mismatches with the code. > + When VT is smaller than ShiftVT, it is safe to use trunc. > + When VT is larger than ShiftVT, it is safe to use zext iff `is_zero_poison` is true (i.e., `opcode == ISD::CTTZ_ZERO_UNDEF`). See also the counterexample `src_shl_cttz2 -> tgt_shl_cttz2` in the alive2 proofs. Closes #94824. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/test/CodeGen/X86/pr94824.ll | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr94824.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 70b3c7d..e3bd4ea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10112,7 +10112,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the // target. if (((N1.getOpcode() == ISD::CTTZ && - VT.getScalarSizeInBits() >= ShiftVT.getScalarSizeInBits()) || + VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) || N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) && TLI.isOperationLegalOrCustom(ISD::MUL, VT)) { diff --git a/llvm/test/CodeGen/X86/pr94824.ll b/llvm/test/CodeGen/X86/pr94824.ll new file mode 100644 index 0000000..7744d00 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr94824.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s + +define i16 @pr94824(i8 %x1) { +; CHECK-LABEL: pr94824: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: orl $256, %edi # imm = 0x100 +; CHECK-NEXT: rep bsfl %edi, %ecx +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq +entry: + %cttz = call i8 @llvm.cttz.i8(i8 %x1, i1 false) + %ext = zext i8 %cttz to i16 + %shl = shl i16 1, %ext + ret i16 %shl +} -- cgit v1.1 From 645fb04a3389e69801d401e669eae9ee42d70217 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 8 Jun 2024 22:28:56 +0800 Subject: [Reassociate] Use uint64_t for repeat count (#94232) This patch relands #91469 and uses `uint64_t` for repeat count to avoid a miscompilation caused by overflow https://github.com/llvm/llvm-project/pull/91469#discussion_r1623925158. --- llvm/lib/Transforms/Scalar/Reassociate.cpp | 120 +++------------------------- llvm/test/Transforms/Reassociate/repeats.ll | 45 +++++++---- 2 files changed, 43 insertions(+), 122 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index c73d7c8..f36e21b 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -302,98 +302,7 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { return Res; } -/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael -/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for -/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic. -/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every -/// even x in Bitwidth-bit arithmetic. -static unsigned CarmichaelShift(unsigned Bitwidth) { - if (Bitwidth < 3) - return Bitwidth - 1; - return Bitwidth - 2; -} - -/// Add the extra weight 'RHS' to the existing weight 'LHS', -/// reducing the combined weight using any special properties of the operation. -/// The existing weight LHS represents the computation X op X op ... op X where -/// X occurs LHS times. The combined weight represents X op X op ... op X with -/// X occurring LHS + RHS times. If op is "Xor" for example then the combined -/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even; -/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second. -static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { - // If we were working with infinite precision arithmetic then the combined - // weight would be LHS + RHS. But we are using finite precision arithmetic, - // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct - // for nilpotent operations and addition, but not for idempotent operations - // and multiplication), so it is important to correctly reduce the combined - // weight back into range if wrapping would be wrong. - - // If RHS is zero then the weight didn't change. - if (RHS.isMinValue()) - return; - // If LHS is zero then the combined weight is RHS. - if (LHS.isMinValue()) { - LHS = RHS; - return; - } - // From this point on we know that neither LHS nor RHS is zero. - - if (Instruction::isIdempotent(Opcode)) { - // Idempotent means X op X === X, so any non-zero weight is equivalent to a - // weight of 1. Keeping weights at zero or one also means that wrapping is - // not a problem. - assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); - return; // Return a weight of 1. - } - if (Instruction::isNilpotent(Opcode)) { - // Nilpotent means X op X === 0, so reduce weights modulo 2. - assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); - LHS = 0; // 1 + 1 === 0 modulo 2. - return; - } - if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) { - // TODO: Reduce the weight by exploiting nsw/nuw? - LHS += RHS; - return; - } - - assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) && - "Unknown associative operation!"); - unsigned Bitwidth = LHS.getBitWidth(); - // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth - // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth - // bit number x, since either x is odd in which case x^CM = 1, or x is even in - // which case both x^W and x^(W - CM) are zero. By subtracting off multiples - // of CM like this weights can always be reduced to the range [0, CM+Bitwidth) - // which by a happy accident means that they can always be represented using - // Bitwidth bits. - // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than - // the Carmichael number). - if (Bitwidth > 3) { - /// CM - The value of Carmichael's lambda function. - APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth)); - // Any weight W >= Threshold can be replaced with W - CM. - APInt Threshold = CM + Bitwidth; - assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!"); - // For Bitwidth 4 or more the following sum does not overflow. - LHS += RHS; - while (LHS.uge(Threshold)) - LHS -= CM; - } else { - // To avoid problems with overflow do everything the same as above but using - // a larger type. - unsigned CM = 1U << CarmichaelShift(Bitwidth); - unsigned Threshold = CM + Bitwidth; - assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold && - "Weights not reduced!"); - unsigned Total = LHS.getZExtValue() + RHS.getZExtValue(); - while (Total >= Threshold) - Total -= CM; - LHS = Total; - } -} - -using RepeatedValue = std::pair; +using RepeatedValue = std::pair; /// Given an associative binary expression, return the leaf /// nodes in Ops along with their weights (how many times the leaf occurs). The @@ -475,7 +384,6 @@ static bool LinearizeExprTree(Instruction *I, assert((isa(I) || isa(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); - unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits(); unsigned Opcode = I->getOpcode(); assert(I->isAssociative() && I->isCommutative() && "Expected an associative and commutative operation!"); @@ -490,8 +398,8 @@ static bool LinearizeExprTree(Instruction *I, // with their weights, representing a certain number of paths to the operator. // If an operator occurs in the worklist multiple times then we found multiple // ways to get to it. - SmallVector, 8> Worklist; // (Op, Weight) - Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1))); + SmallVector, 8> Worklist; // (Op, Weight) + Worklist.push_back(std::make_pair(I, 1)); bool Changed = false; // Leaves of the expression are values that either aren't the right kind of @@ -509,7 +417,7 @@ static bool LinearizeExprTree(Instruction *I, // Leaves - Keeps track of the set of putative leaves as well as the number of // paths to each leaf seen so far. - using LeafMap = DenseMap; + using LeafMap = DenseMap; LeafMap Leaves; // Leaf -> Total weight so far. SmallVector LeafOrder; // Ensure deterministic leaf output order. const DataLayout DL = I->getModule()->getDataLayout(); @@ -518,8 +426,8 @@ static bool LinearizeExprTree(Instruction *I, SmallPtrSet Visited; // For checking the iteration scheme. #endif while (!Worklist.empty()) { - std::pair P = Worklist.pop_back_val(); - I = P.first; // We examine the operands of this binary operator. + // We examine the operands of this binary operator. + auto [I, Weight] = Worklist.pop_back_val(); if (isa(I)) { Flags.HasNUW &= I->hasNoUnsignedWrap(); @@ -528,7 +436,6 @@ static bool LinearizeExprTree(Instruction *I, for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands. Value *Op = I->getOperand(OpIdx); - APInt Weight = P.second; // Number of paths to this operand. LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n"); assert(!Op->use_empty() && "No uses, so how did we get to it?!"); @@ -562,7 +469,8 @@ static bool LinearizeExprTree(Instruction *I, "In leaf map but not visited!"); // Update the number of paths to the leaf. - IncorporateWeight(It->second, Weight, Opcode); + It->second += Weight; + assert(It->second >= Weight && "Weight overflows"); // If we still have uses that are not accounted for by the expression // then it is not safe to modify the value. @@ -625,10 +533,7 @@ static bool LinearizeExprTree(Instruction *I, // Node initially thought to be a leaf wasn't. continue; assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!"); - APInt Weight = It->second; - if (Weight.isMinValue()) - // Leaf already output or weight reduction eliminated it. - continue; + uint64_t Weight = It->second; // Ensure the leaf is only output once. It->second = 0; Ops.push_back(std::make_pair(V, Weight)); @@ -642,7 +547,7 @@ static bool LinearizeExprTree(Instruction *I, if (Ops.empty()) { Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); assert(Identity && "Associative operation without identity!"); - Ops.emplace_back(Identity, APInt(Bitwidth, 1)); + Ops.emplace_back(Identity, 1); } return Changed; @@ -1188,8 +1093,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { RepeatedValue E = Tree[i]; - Factors.append(E.second.getZExtValue(), - ValueEntry(getRank(E.first), E.first)); + Factors.append(E.second, ValueEntry(getRank(E.first), E.first)); } bool FoundFactor = false; @@ -2368,7 +2272,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { SmallVector Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) - Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first)); + Ops.append(E.second, ValueEntry(getRank(E.first), E.first)); LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); diff --git a/llvm/test/Transforms/Reassociate/repeats.ll b/llvm/test/Transforms/Reassociate/repeats.ll index ba25c4b..8600777 100644 --- a/llvm/test/Transforms/Reassociate/repeats.ll +++ b/llvm/test/Transforms/Reassociate/repeats.ll @@ -60,7 +60,8 @@ define i3 @foo3x5(i3 %x) { ; CHECK-SAME: i3 [[X:%.*]]) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[X]], [[X]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i3 [[TMP3]], [[X]] -; CHECK-NEXT: ret i3 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i3 [[TMP4]], [[TMP3]] +; CHECK-NEXT: ret i3 [[TMP5]] ; %tmp1 = mul i3 %x, %x %tmp2 = mul i3 %tmp1, %x @@ -74,7 +75,8 @@ define i3 @foo3x5_nsw(i3 %x) { ; CHECK-LABEL: define i3 @foo3x5_nsw( ; CHECK-SAME: i3 [[X:%.*]]) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[X]], [[X]] -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i3 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i3 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i3 [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret i3 [[TMP4]] ; %tmp1 = mul i3 %x, %x @@ -89,7 +91,8 @@ define i3 @foo3x6(i3 %x) { ; CHECK-LABEL: define i3 @foo3x6( ; CHECK-SAME: i3 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i3 [[X]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i3 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i3 [[TMP3]], [[TMP3]] ; CHECK-NEXT: ret i3 [[TMP2]] ; %tmp1 = mul i3 %x, %x @@ -106,7 +109,9 @@ define i3 @foo3x7(i3 %x) { ; CHECK-SAME: i3 [[X:%.*]]) { ; CHECK-NEXT: [[TMP5:%.*]] = mul i3 [[X]], [[X]] ; CHECK-NEXT: [[TMP6:%.*]] = mul i3 [[TMP5]], [[X]] -; CHECK-NEXT: ret i3 [[TMP6]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[TMP6]], [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i3 [[TMP3]], [[TMP6]] +; CHECK-NEXT: ret i3 [[TMP7]] ; %tmp1 = mul i3 %x, %x %tmp2 = mul i3 %tmp1, %x @@ -123,7 +128,8 @@ define i4 @foo4x8(i4 %x) { ; CHECK-SAME: i4 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[TMP1]] -; CHECK-NEXT: ret i4 [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP4]], [[TMP4]] +; CHECK-NEXT: ret i4 [[TMP3]] ; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x @@ -140,8 +146,9 @@ define i4 @foo4x9(i4 %x) { ; CHECK-LABEL: define i4 @foo4x9( ; CHECK-SAME: i4 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[X]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i4 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[X]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i4 [[TMP3]], [[TMP2]] ; CHECK-NEXT: ret i4 [[TMP8]] ; %tmp1 = mul i4 %x, %x @@ -160,7 +167,8 @@ define i4 @foo4x10(i4 %x) { ; CHECK-LABEL: define i4 @foo4x10( ; CHECK-SAME: i4 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP4]], [[X]] ; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[TMP2]] ; CHECK-NEXT: ret i4 [[TMP3]] ; @@ -181,7 +189,8 @@ define i4 @foo4x11(i4 %x) { ; CHECK-LABEL: define i4 @foo4x11( ; CHECK-SAME: i4 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP4]], [[X]] ; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[X]] ; CHECK-NEXT: [[TMP10:%.*]] = mul i4 [[TMP3]], [[TMP2]] ; CHECK-NEXT: ret i4 [[TMP10]] @@ -204,7 +213,9 @@ define i4 @foo4x12(i4 %x) { ; CHECK-LABEL: define i4 @foo4x12( ; CHECK-SAME: i4 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP4]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP3]], [[TMP3]] ; CHECK-NEXT: ret i4 [[TMP2]] ; %tmp1 = mul i4 %x, %x @@ -227,7 +238,9 @@ define i4 @foo4x13(i4 %x) { ; CHECK-SAME: i4 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[X]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i4 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i4 [[TMP4]], [[TMP3]] ; CHECK-NEXT: ret i4 [[TMP12]] ; %tmp1 = mul i4 %x, %x @@ -252,7 +265,9 @@ define i4 @foo4x14(i4 %x) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] ; CHECK-NEXT: [[TMP6:%.*]] = mul i4 [[TMP1]], [[X]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i4 [[TMP6]], [[TMP6]] -; CHECK-NEXT: ret i4 [[TMP7]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP7]], [[X]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i4 [[TMP4]], [[TMP4]] +; CHECK-NEXT: ret i4 [[TMP5]] ; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x @@ -276,8 +291,10 @@ define i4 @foo4x15(i4 %x) { ; CHECK-SAME: i4 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] ; CHECK-NEXT: [[TMP6:%.*]] = mul i4 [[TMP1]], [[X]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i4 [[TMP6]], [[X]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i4 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP6]], [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i4 [[TMP4]], [[X]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i4 [[TMP5]], [[TMP4]] ; CHECK-NEXT: ret i4 [[TMP14]] ; %tmp1 = mul i4 %x, %x -- cgit v1.1 From bca7864ffe9045e896fe0ed087150af37778eb40 Mon Sep 17 00:00:00 2001 From: AtariDreams Date: Sat, 8 Jun 2024 11:01:05 -0400 Subject: [X86] Support ATOMIC_LOAD_FP_BINOP_MI for other binops (#87524) Since we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and fdiv. --- llvm/lib/Target/X86/X86InstrCompiler.td | 4 +- llvm/test/CodeGen/X86/atomic-fp.ll | 2323 +++++++++++++++++++++++++++++++ 2 files changed, 2326 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 270dd32..cf5a32f 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1125,7 +1125,9 @@ multiclass ATOMIC_LOAD_FP_BINOP_MI { Requires<[HasAVX512]>; } defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>; -// FIXME: Add fsub, fmul, fdiv, ... +defm : ATOMIC_LOAD_FP_BINOP_MI<"SUB", fsub>; +defm : ATOMIC_LOAD_FP_BINOP_MI<"MUL", fmul>; +defm : ATOMIC_LOAD_FP_BINOP_MI<"DIV", fdiv>; multiclass RELEASE_UNOP { diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll index 1094edd..fe79dfe 100644 --- a/llvm/test/CodeGen/X86/atomic-fp.ll +++ b/llvm/test/CodeGen/X86/atomic-fp.ll @@ -777,3 +777,2326 @@ bb: store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 ret void } + +; ----- FSUB ----- + +define dso_local void @fsub_32r(ptr %loc, float %val) nounwind { +; X86-NOSSE-LABEL: fsub_32r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl (%eax), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fsubs {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%eax) +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_32r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl (%eax), %ecx +; X86-SSE1-NEXT: movl %ecx, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: subss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl %ecx, (%eax) +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_32r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: subss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%eax) +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_32r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, (%eax) +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_32r: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-SSE-NEXT: subss %xmm0, %xmm1 +; X64-SSE-NEXT: movss %xmm1, (%rdi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_32r: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, (%rdi) +; X64-AVX-NEXT: retq + %1 = load atomic i32, ptr %loc seq_cst, align 4 + %2 = bitcast i32 %1 to float + %sub = fsub float %2, %val + %3 = bitcast float %sub to i32 + store atomic i32 %3, ptr %loc release, align 4 + ret void +} + +define dso_local void @fsub_64r(ptr %loc, double %val) nounwind { +; X86-NOSSE-LABEL: fsub_64r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: movl 8(%ebp), %eax +; X86-NOSSE-NEXT: fildll (%eax) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fsubl 12(%ebp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_64r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: movl 8(%ebp), %eax +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fsubl 12(%ebp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, (%eax) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_64r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movl 8(%ebp), %eax +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_64r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movl 8(%ebp), %eax +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_64r: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-SSE-NEXT: subsd %xmm0, %xmm1 +; X64-SSE-NEXT: movsd %xmm1, (%rdi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_64r: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) +; X64-AVX-NEXT: retq + %1 = load atomic i64, ptr %loc seq_cst, align 8 + %2 = bitcast i64 %1 to double + %sub = fsub double %2, %val + %3 = bitcast double %sub to i64 + store atomic i64 %3, ptr %loc release, align 8 + ret void +} + +; Floating-point sub to a global using an immediate. +define dso_local void @fsub_32g() nounwind { +; X86-NOSSE-LABEL: fsub_32g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl glob32, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fchs +; X86-NOSSE-NEXT: fadds (%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, glob32 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_32g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl glob32, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, glob32 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_32g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: addss glob32, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, glob32 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_32g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, glob32 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_32g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-SSE-NEXT: addss glob32(%rip), %xmm0 +; X64-SSE-NEXT: movss %xmm0, glob32(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_32g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-AVX-NEXT: vaddss glob32(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i32, ptr @glob32 monotonic, align 4 + %f = bitcast i32 %i to float + %sub = fsub float %f, 1.000000e+00 + %s = bitcast float %sub to i32 + store atomic i32 %s, ptr @glob32 monotonic, align 4 + ret void +} + +define dso_local void @fsub_64g() nounwind { +; X86-NOSSE-LABEL: fsub_64g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll glob64 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fchs +; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_64g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fld1 +; X86-SSE1-NEXT: fchs +; X86-SSE1-NEXT: faddl (%esp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, glob64 +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_64g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, glob64 +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_64g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, glob64 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_64g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] +; X64-SSE-NEXT: addsd glob64(%rip), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_64g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] +; X64-AVX-NEXT: vaddsd glob64(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i64, ptr @glob64 monotonic, align 8 + %f = bitcast i64 %i to double + %sub = fsub double %f, 1.000000e+00 + %s = bitcast double %sub to i64 + store atomic i64 %s, ptr @glob64 monotonic, align 8 + ret void +} + +; Floating-point sub to a hard-coded immediate location using an immediate. +define dso_local void @fsub_32imm() nounwind { +; X86-NOSSE-LABEL: fsub_32imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl -559038737, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fchs +; X86-NOSSE-NEXT: fadds (%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, -559038737 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_32imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl -559038737, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, -559038737 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_32imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: addss -559038737, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, -559038737 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_32imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, -559038737 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_32imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-SSE-NEXT: addss (%rax), %xmm0 +; X64-SSE-NEXT: movss %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_32imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 + %f = bitcast i32 %i to float + %sub = fsub float %f, 1.000000e+00 + %s = bitcast float %sub to i32 + store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 + ret void +} + +define dso_local void @fsub_64imm() nounwind { +; X86-NOSSE-LABEL: fsub_64imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll -559038737 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fchs +; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_64imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fld1 +; X86-SSE1-NEXT: fchs +; X86-SSE1-NEXT: faddl (%esp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, -559038737 +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_64imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, -559038737 +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_64imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, -559038737 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_64imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] +; X64-SSE-NEXT: addsd (%rax), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_64imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] +; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 + %f = bitcast i64 %i to double + %sub = fsub double %f, 1.000000e+00 + %s = bitcast double %sub to i64 + store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 + ret void +} + +; Floating-point sub to a stack location. +define dso_local void @fsub_32stack() nounwind { +; X86-NOSSE-LABEL: fsub_32stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fsubs (%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_32stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $12, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE1-NEXT: subss (%esp), %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: addl $12, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_32stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: subss (%esp), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_32stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-AVX-NEXT: vsubss (%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, (%esp) +; X86-AVX-NEXT: popl %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_32stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-SSE-NEXT: subss -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_32stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-AVX-NEXT: vsubss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i32, align 4 + %load = load atomic i32, ptr %ptr acquire, align 4 + %bc0 = bitcast i32 %load to float + %fsub = fsub float 1.000000e+00, %bc0 + %bc1 = bitcast float %fsub to i32 + store atomic i32 %bc1, ptr %ptr release, align 4 + ret void +} + +define dso_local void @fsub_64stack() nounwind { +; X86-NOSSE-LABEL: fsub_64stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fsubl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_64stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $24, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fld1 +; X86-SSE1-NEXT: fsubl (%esp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_64stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] +; X86-SSE2-NEXT: subsd %xmm0, %xmm1 +; X86-SSE2-NEXT: movsd %xmm1, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_64stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $16, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] +; X86-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_64stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; X64-SSE-NEXT: subsd -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_64stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; X64-AVX-NEXT: vsubsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i64, align 8 + %load = load atomic i64, ptr %ptr acquire, align 8 + %bc0 = bitcast i64 %load to double + %fsub = fsub double 1.000000e+00, %bc0 + %bc1 = bitcast double %fsub to i64 + store atomic i64 %bc1, ptr %ptr release, align 8 + ret void +} + +define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind { +; X86-NOSSE-LABEL: fsub_array: +; X86-NOSSE: # %bb.0: # %bb +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: movl 20(%ebp), %eax +; X86-NOSSE-NEXT: movl 8(%ebp), %ecx +; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fsubl 12(%ebp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %edx, (%esp) +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: leal -4(%ebp), %esp +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fsub_array: +; X86-SSE1: # %bb.0: # %bb +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: movl 20(%ebp), %eax +; X86-SSE1-NEXT: movl 8(%ebp), %ecx +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fsubl 12(%ebp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fsub_array: +; X86-SSE2: # %bb.0: # %bb +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movl 20(%ebp), %eax +; X86-SSE2-NEXT: movl 8(%ebp), %ecx +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fsub_array: +; X86-AVX: # %bb.0: # %bb +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movl 20(%ebp), %eax +; X86-AVX-NEXT: movl 8(%ebp), %ecx +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fsub_array: +; X64-SSE: # %bb.0: # %bb +; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-SSE-NEXT: subsd %xmm0, %xmm1 +; X64-SSE-NEXT: movsd %xmm1, (%rdi,%rsi,8) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fsub_array: +; X64-AVX: # %bb.0: # %bb +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) +; X64-AVX-NEXT: retq +bb: + %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 + %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 + %tmp7 = bitcast i64 %tmp6 to double + %tmp8 = fsub double %tmp7, %arg1 + %tmp9 = bitcast double %tmp8 to i64 + store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 + ret void +} + +; ----- FMUL ----- + +define dso_local void @fmul_32r(ptr %loc, float %val) nounwind { +; X86-NOSSE-LABEL: fmul_32r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl (%eax), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fmuls {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%eax) +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_32r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl (%eax), %ecx +; X86-SSE1-NEXT: movl %ecx, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: mulss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl %ecx, (%eax) +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_32r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: mulss (%eax), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%eax) +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_32r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vmulss (%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, (%eax) +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_32r: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: mulss (%rdi), %xmm0 +; X64-SSE-NEXT: movss %xmm0, (%rdi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_32r: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, (%rdi) +; X64-AVX-NEXT: retq + %1 = load atomic i32, ptr %loc seq_cst, align 4 + %2 = bitcast i32 %1 to float + %mul = fmul float %2, %val + %3 = bitcast float %mul to i32 + store atomic i32 %3, ptr %loc release, align 4 + ret void +} + +define dso_local void @fmul_64r(ptr %loc, double %val) nounwind { +; X86-NOSSE-LABEL: fmul_64r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: movl 8(%ebp), %eax +; X86-NOSSE-NEXT: fildll (%eax) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fmull 12(%ebp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_64r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: movl 8(%ebp), %eax +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fmull 12(%ebp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, (%eax) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_64r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movl 8(%ebp), %eax +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_64r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movl 8(%ebp), %eax +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_64r: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: mulsd (%rdi), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, (%rdi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_64r: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) +; X64-AVX-NEXT: retq + %1 = load atomic i64, ptr %loc seq_cst, align 8 + %2 = bitcast i64 %1 to double + %mul = fmul double %2, %val + %3 = bitcast double %mul to i64 + store atomic i64 %3, ptr %loc release, align 8 + ret void +} + +; Floating-point mul to a global using an immediate. +define dso_local void @fmul_32g() nounwind { +; X86-NOSSE-LABEL: fmul_32g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl glob32, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, glob32 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_32g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl glob32, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, glob32 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_32g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: mulss glob32, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, glob32 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_32g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-AVX-NEXT: vmulss glob32, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, glob32 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_32g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-SSE-NEXT: mulss glob32(%rip), %xmm0 +; X64-SSE-NEXT: movss %xmm0, glob32(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_32g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-AVX-NEXT: vmulss glob32(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i32, ptr @glob32 monotonic, align 4 + %f = bitcast i32 %i to float + %mul = fmul float %f, 0x400921FA00000000 + %s = bitcast float %mul to i32 + store atomic i32 %s, ptr @glob32 monotonic, align 4 + ret void +} + +define dso_local void @fmul_64g() nounwind { +; X86-NOSSE-LABEL: fmul_64g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll glob64 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_64g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, glob64 +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_64g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, glob64 +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_64g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, glob64 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_64g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] +; X64-SSE-NEXT: mulsd glob64(%rip), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_64g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] +; X64-AVX-NEXT: vmulsd glob64(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i64, ptr @glob64 monotonic, align 8 + %f = bitcast i64 %i to double + %mul = fmul double %f, 0x400921FA00000000 + %s = bitcast double %mul to i64 + store atomic i64 %s, ptr @glob64 monotonic, align 8 + ret void +} + +; Floating-point mul to a hard-coded immediate location using an immediate. +define dso_local void @fmul_32imm() nounwind { +; X86-NOSSE-LABEL: fmul_32imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl -559038737, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, -559038737 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_32imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl -559038737, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, -559038737 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_32imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: mulss -559038737, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, -559038737 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_32imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-AVX-NEXT: vmulss -559038737, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, -559038737 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_32imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-SSE-NEXT: mulss (%rax), %xmm0 +; X64-SSE-NEXT: movss %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_32imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-AVX-NEXT: vmulss (%rax), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 + %f = bitcast i32 %i to float + %mul = fmul float %f, 0x400921FA00000000 + %s = bitcast float %mul to i32 + store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 + ret void +} + +define dso_local void @fmul_64imm() nounwind { +; X86-NOSSE-LABEL: fmul_64imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll -559038737 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_64imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, -559038737 +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_64imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, -559038737 +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_64imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, -559038737 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_64imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] +; X64-SSE-NEXT: mulsd (%rax), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_64imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] +; X64-AVX-NEXT: vmulsd (%rax), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 + %f = bitcast i64 %i to double + %mul = fmul double %f, 0x400921FA00000000 + %s = bitcast double %mul to i64 + store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 + ret void +} + +; Floating-point mul to a stack location. +define dso_local void @fmul_32stack() nounwind { +; X86-NOSSE-LABEL: fmul_32stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_32stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $12, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: addl $12, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_32stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: mulss (%esp), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_32stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-AVX-NEXT: vmulss (%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, (%esp) +; X86-AVX-NEXT: popl %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_32stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-SSE-NEXT: mulss -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_32stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-AVX-NEXT: vmulss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i32, align 4 + %load = load atomic i32, ptr %ptr acquire, align 4 + %bc0 = bitcast i32 %load to float + %fmul = fmul float 0x400921FA00000000, %bc0 + %bc1 = bitcast float %fmul to i32 + store atomic i32 %bc1, ptr %ptr release, align 4 + ret void +} + +define dso_local void @fmul_64stack() nounwind { +; X86-NOSSE-LABEL: fmul_64stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_64stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $24, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_64stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_64stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $16, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_64stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] +; X64-SSE-NEXT: mulsd -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_64stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] +; X64-AVX-NEXT: vmulsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i64, align 8 + %load = load atomic i64, ptr %ptr acquire, align 8 + %bc0 = bitcast i64 %load to double + %fmul = fmul double 0x400921FA00000000, %bc0 + %bc1 = bitcast double %fmul to i64 + store atomic i64 %bc1, ptr %ptr release, align 8 + ret void +} + +define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind { +; X86-NOSSE-LABEL: fmul_array: +; X86-NOSSE: # %bb.0: # %bb +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: movl 20(%ebp), %eax +; X86-NOSSE-NEXT: movl 8(%ebp), %ecx +; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fmull 12(%ebp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %edx, (%esp) +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: leal -4(%ebp), %esp +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fmul_array: +; X86-SSE1: # %bb.0: # %bb +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: movl 20(%ebp), %eax +; X86-SSE1-NEXT: movl 8(%ebp), %ecx +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fmull 12(%ebp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fmul_array: +; X86-SSE2: # %bb.0: # %bb +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movl 20(%ebp), %eax +; X86-SSE2-NEXT: movl 8(%ebp), %ecx +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fmul_array: +; X86-AVX: # %bb.0: # %bb +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movl 20(%ebp), %eax +; X86-AVX-NEXT: movl 8(%ebp), %ecx +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fmul_array: +; X64-SSE: # %bb.0: # %bb +; X64-SSE-NEXT: mulsd (%rdi,%rsi,8), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, (%rdi,%rsi,8) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fmul_array: +; X64-AVX: # %bb.0: # %bb +; X64-AVX-NEXT: vmulsd (%rdi,%rsi,8), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) +; X64-AVX-NEXT: retq +bb: + %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 + %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 + %tmp7 = bitcast i64 %tmp6 to double + %tmp8 = fmul double %tmp7, %arg1 + %tmp9 = bitcast double %tmp8 to i64 + store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 + ret void +} + +; ----- FDIV ----- + +define dso_local void @fdiv_32r(ptr %loc, float %val) nounwind { +; X86-NOSSE-LABEL: fdiv_32r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl (%eax), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fdivs {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, (%eax) +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_32r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl (%eax), %ecx +; X86-SSE1-NEXT: movl %ecx, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: divss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl %ecx, (%eax) +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_32r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: divss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%eax) +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_32r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, (%eax) +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_32r: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-SSE-NEXT: divss %xmm0, %xmm1 +; X64-SSE-NEXT: movss %xmm1, (%rdi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_32r: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, (%rdi) +; X64-AVX-NEXT: retq + %1 = load atomic i32, ptr %loc seq_cst, align 4 + %2 = bitcast i32 %1 to float + %div = fdiv float %2, %val + %3 = bitcast float %div to i32 + store atomic i32 %3, ptr %loc release, align 4 + ret void +} + +define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind { +; X86-NOSSE-LABEL: fdiv_64r: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: movl 8(%ebp), %eax +; X86-NOSSE-NEXT: fildll (%eax) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fdivl 12(%ebp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_64r: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: movl 8(%ebp), %eax +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fdivl 12(%ebp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, (%eax) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_64r: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movl 8(%ebp), %eax +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_64r: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movl 8(%ebp), %eax +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_64r: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-SSE-NEXT: divsd %xmm0, %xmm1 +; X64-SSE-NEXT: movsd %xmm1, (%rdi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_64r: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) +; X64-AVX-NEXT: retq + %1 = load atomic i64, ptr %loc seq_cst, align 8 + %2 = bitcast i64 %1 to double + %div = fdiv double %2, %val + %3 = bitcast double %div to i64 + store atomic i64 %3, ptr %loc release, align 8 + ret void +} + +; Floating-point div to a global using an immediate. +define dso_local void @fdiv_32g() nounwind { +; X86-NOSSE-LABEL: fdiv_32g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl glob32, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, glob32 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_32g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl glob32, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, glob32 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_32g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, glob32 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_32g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, glob32 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_32g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: movss %xmm0, glob32(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_32g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i32, ptr @glob32 monotonic, align 4 + %f = bitcast i32 %i to float + %div = fdiv float %f, 0x400921FA00000000 + %s = bitcast float %div to i32 + store atomic i32 %s, ptr @glob32 monotonic, align 4 + ret void +} + +define dso_local void @fdiv_64g() nounwind { +; X86-NOSSE-LABEL: fdiv_64g: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll glob64 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_64g: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, glob64 +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_64g: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, glob64 +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_64g: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, glob64 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_64g: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_64g: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) +; X64-AVX-NEXT: retq + %i = load atomic i64, ptr @glob64 monotonic, align 8 + %f = bitcast i64 %i to double + %div = fdiv double %f, 0x400921FA00000000 + %s = bitcast double %div to i64 + store atomic i64 %s, ptr @glob64 monotonic, align 8 + ret void +} + +; Floating-point div to a hard-coded immediate location using an immediate. +define dso_local void @fdiv_32imm() nounwind { +; X86-NOSSE-LABEL: fdiv_32imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: movl -559038737, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, -559038737 +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_32imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $8, %esp +; X86-SSE1-NEXT: movl -559038737, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, -559038737 +; X86-SSE1-NEXT: addl $8, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_32imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movss %xmm0, -559038737 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_32imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, -559038737 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_32imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: movss %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_32imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 + %f = bitcast i32 %i to float + %div = fdiv float %f, 0x400921FA00000000 + %s = bitcast float %div to i32 + store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 + ret void +} + +define dso_local void @fdiv_64imm() nounwind { +; X86-NOSSE-LABEL: fdiv_64imm: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll -559038737 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_64imm: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, -559038737 +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_64imm: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, -559038737 +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_64imm: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, -559038737 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_64imm: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, (%rax) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_64imm: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rax) +; X64-AVX-NEXT: retq + %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 + %f = bitcast i64 %i to double + %div = fdiv double %f, 0x400921FA00000000 + %s = bitcast double %div to i64 + store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 + ret void +} + +; Floating-point div to a stack location. +define dso_local void @fdiv_32stack() nounwind { +; X86-NOSSE-LABEL: fdiv_32stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fdivs (%esp) +; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_32stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: subl $12, %esp +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE1-NEXT: divss (%esp), %xmm0 +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: addl $12, %esp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_32stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: divss (%esp), %xmm0 +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_32stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-AVX-NEXT: vdivss (%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovss %xmm0, (%esp) +; X86-AVX-NEXT: popl %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_32stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-SSE-NEXT: divss -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_32stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-AVX-NEXT: vdivss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i32, align 4 + %load = load atomic i32, ptr %ptr acquire, align 4 + %bc0 = bitcast i32 %load to float + %fdiv = fdiv float 1.000000e+00, %bc0 + %bc1 = bitcast float %fdiv to i32 + store atomic i32 %bc1, ptr %ptr release, align 4 + ret void +} + +define dso_local void @fdiv_64stack() nounwind { +; X86-NOSSE-LABEL: fdiv_64stack: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: fdivl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_64stack: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $24, %esp +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fld1 +; X86-SSE1-NEXT: fdivl (%esp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_64stack: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] +; X86-SSE2-NEXT: divsd %xmm0, %xmm1 +; X86-SSE2-NEXT: movsd %xmm1, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_64stack: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $16, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] +; X86-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_64stack: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; X64-SSE-NEXT: divsd -{{[0-9]+}}(%rsp), %xmm0 +; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_64stack: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; X64-AVX-NEXT: vdivsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX-NEXT: retq + %ptr = alloca i64, align 8 + %load = load atomic i64, ptr %ptr acquire, align 8 + %bc0 = bitcast i64 %load to double + %fdiv = fdiv double 1.000000e+00, %bc0 + %bc1 = bitcast double %fdiv to i64 + store atomic i64 %bc1, ptr %ptr release, align 8 + ret void +} + +define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind { +; X86-NOSSE-LABEL: fdiv_array: +; X86-NOSSE: # %bb.0: # %bb +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: movl 20(%ebp), %eax +; X86-NOSSE-NEXT: movl 8(%ebp), %ecx +; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fdivl 12(%ebp) +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %edx, (%esp) +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: leal -4(%ebp), %esp +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: fdiv_array: +; X86-SSE1: # %bb.0: # %bb +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: movl %esp, %ebp +; X86-SSE1-NEXT: andl $-8, %esp +; X86-SSE1-NEXT: subl $16, %esp +; X86-SSE1-NEXT: movl 20(%ebp), %eax +; X86-SSE1-NEXT: movl 8(%ebp), %ecx +; X86-SSE1-NEXT: xorps %xmm0, %xmm0 +; X86-SSE1-NEXT: xorps %xmm1, %xmm1 +; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; X86-SSE1-NEXT: movss %xmm1, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: fldl (%esp) +; X86-SSE1-NEXT: fdivl 12(%ebp) +; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE1-NEXT: movl %ebp, %esp +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: fdiv_array: +; X86-SSE2: # %bb.0: # %bb +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movl 20(%ebp), %eax +; X86-SSE2-NEXT: movl 8(%ebp), %ecx +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: fdiv_array: +; X86-AVX: # %bb.0: # %bb +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movl 20(%ebp), %eax +; X86-AVX-NEXT: movl 8(%ebp), %ecx +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: fdiv_array: +; X64-SSE: # %bb.0: # %bb +; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-SSE-NEXT: divsd %xmm0, %xmm1 +; X64-SSE-NEXT: movsd %xmm1, (%rdi,%rsi,8) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fdiv_array: +; X64-AVX: # %bb.0: # %bb +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) +; X64-AVX-NEXT: retq +bb: + %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 + %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 + %tmp7 = bitcast i64 %tmp6 to double + %tmp8 = fdiv double %tmp7, %arg1 + %tmp9 = bitcast double %tmp8 to i64 + store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 + ret void +} -- cgit v1.1 From c8708822784e285e151e99eb1d396380ba57100b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 8 Jun 2024 08:35:25 -0700 Subject: [memprof] Make Version3 officially available (#94837) --- llvm/include/llvm/ProfileData/MemProf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index a650149..53ddfd1 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -28,7 +28,8 @@ enum IndexedVersion : uint64_t { Version1 = 1, // Version 2: Added a call stack table. Version2 = 2, - // Version 3: Under development. + // Version 3: Added a radix tree for call stacks. Switched to linear IDs for + // frames and call stacks. Version3 = 3, }; -- cgit v1.1 From 38124fef7ec70cf73f3b86a03a287a88a16d7926 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 8 Jun 2024 08:36:42 -0700 Subject: [ProfileData] Use a range-based for loop (NFC) (#94856) While I am at it, this patch adds const to a couple of places. --- llvm/lib/ProfileData/InstrProfReader.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 7758363..27855bf 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -145,11 +145,11 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer, static void printBinaryIdsInternal(raw_ostream &OS, - std::vector &BinaryIds) { + const std::vector &BinaryIds) { OS << "Binary IDs: \n"; - for (auto BI : BinaryIds) { - for (uint64_t I = 0; I < BI.size(); I++) - OS << format("%02x", BI[I]); + for (const auto &BI : BinaryIds) { + for (auto I : BI) + OS << format("%02x", I); OS << "\n"; } } -- cgit v1.1 From 6834e6ddd07a0a23b71dc4f1c1fdc3e9d35ceb41 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 8 Jun 2024 08:38:36 -0700 Subject: [memprof] Remove redundant virtual (NFC) (#94858) 'override' makes 'virtual' redundant. Identified with modernize-use-override. --- llvm/include/llvm/ProfileData/MemProfReader.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index f028682..fbba6483 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -137,7 +137,7 @@ class RawMemProfReader final : public MemProfReader { public: RawMemProfReader(const RawMemProfReader &) = delete; RawMemProfReader &operator=(const RawMemProfReader &) = delete; - virtual ~RawMemProfReader() override = default; + ~RawMemProfReader() override = default; // Prints the contents of the profile in YAML format. void printYAML(raw_ostream &OS); @@ -161,7 +161,7 @@ public: // Returns a list of build ids recorded in the segment information. static std::vector peekBuildIds(MemoryBuffer *DataBuffer); - virtual Error + Error readNextRecord(GuidMemProfRecordPair &GuidRecord, std::function Callback) override; -- cgit v1.1 From c8992fb7bf123345911c467a34dc3690aac0933d Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sat, 8 Jun 2024 17:49:53 +0200 Subject: [libc++][NFC] Simplify the implementation of `__promote` (#81379) This depends on enabling the use of extensions. --- libcxx/include/__type_traits/promote.h | 42 ++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__type_traits/promote.h b/libcxx/include/__type_traits/promote.h index e22b4a4..2b2a684 100644 --- a/libcxx/include/__type_traits/promote.h +++ b/libcxx/include/__type_traits/promote.h @@ -11,8 +11,12 @@ #include <__config> #include <__type_traits/integral_constant.h> -#include <__type_traits/is_same.h> -#include <__utility/declval.h> +#include <__type_traits/is_arithmetic.h> + +#if defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER == 1700 +# include <__type_traits/is_same.h> +# include <__utility/declval.h> +#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,6 +24,34 @@ _LIBCPP_BEGIN_NAMESPACE_STD +// TODO(LLVM-20): Remove this workaround +#if !defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER != 1700 + +template +class __promote { + static_assert((is_arithmetic<_Args>::value && ...)); + + static float __test(float); + static double __test(char); + static double __test(int); + static double __test(unsigned); + static double __test(long); + static double __test(unsigned long); + static double __test(long long); + static double __test(unsigned long long); +# ifndef _LIBCPP_HAS_NO_INT128 + static double __test(__int128_t); + static double __test(__uint128_t); +# endif + static double __test(double); + static long double __test(long double); + +public: + using type = decltype((__test(_Args()) + ...)); +}; + +#else + template struct __numeric_type { static void __test(...); @@ -31,10 +63,10 @@ struct __numeric_type { static double __test(unsigned long); static double __test(long long); static double __test(unsigned long long); -#ifndef _LIBCPP_HAS_NO_INT128 +# ifndef _LIBCPP_HAS_NO_INT128 static double __test(__int128_t); static double __test(__uint128_t); -#endif +# endif static double __test(double); static long double __test(long double); @@ -89,6 +121,8 @@ public: template class __promote : public __promote_imp<_A1, _A2, _A3> {}; +#endif // !defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER >= 1700 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___TYPE_TRAITS_PROMOTE_H -- cgit v1.1 From bafff3e5316a33d6f016f6e9d6eb02020ad603b6 Mon Sep 17 00:00:00 2001 From: Sam Elliott Date: Sat, 8 Jun 2024 17:53:48 +0100 Subject: [RISCV][MC] Implicit 0-offset aliases for JR/JALR (#94688) This broadly follows how in almost all places, we accept `()` to mean `0()`, but I think these are the first like this for Jumps rather than Loads/Stores. These are accepted by binutils but not by LLVM: https://godbolt.org/z/GK7MGE7q7 --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 3 +++ llvm/test/MC/RISCV/rvi-aliases-valid.s | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 010e07f..4cdf08a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -943,6 +943,9 @@ def : InstAlias<"ret", (JALR X0, X1, 0), 4>; def : InstAlias<"jr $rs, $offset", (JALR X0, GPR:$rs, simm12:$offset), 0>; def : InstAlias<"jalr $rs, $offset", (JALR X1, GPR:$rs, simm12:$offset), 0>; def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset), 0>; +def : InstAlias<"jr (${rs})", (JALR X0, GPR:$rs, 0), 0>; +def : InstAlias<"jalr (${rs})", (JALR X1, GPR:$rs, 0), 0>; +def : InstAlias<"jalr $rd, (${rs})", (JALR GPR:$rd, GPR:$rs, 0), 0>; def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw diff --git a/llvm/test/MC/RISCV/rvi-aliases-valid.s b/llvm/test/MC/RISCV/rvi-aliases-valid.s index 098d5c1..9ac6a8a 100644 --- a/llvm/test/MC/RISCV/rvi-aliases-valid.s +++ b/llvm/test/MC/RISCV/rvi-aliases-valid.s @@ -190,6 +190,16 @@ jalr x25, x26, 11 # CHECK-S-OBJ-NOALIAS: jalr zero, 0(ra) # CHECK-S-OBJ: ret ret +# CHECK-S-OBJ-NOALIAS: jalr zero, 0(s11) +# CHECK-S-OBJ: jr s11 +jr (x27) +# CHECK-S-OBJ-NOALIAS: jalr ra, 0(t3) +# CHECK-S-OBJ: jalr t3 +jalr (x28) +# CHECK-S-OBJ-NOALIAS: jalr t4, 0(t5) +# CHECK-S-OBJ: jalr t4, t5 +jalr x29, (x30) + # TODO call # TODO tail -- cgit v1.1 From 80d00bf811cc3361c78e55243f8c5aa8deffdb0a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 8 Jun 2024 10:00:41 -0700 Subject: [ProfileData] Use default member initialization (NFC) (#94860) Identified with modernize-use-default-member-init. --- llvm/include/llvm/ProfileData/InstrProfReader.h | 4 ++-- llvm/include/llvm/ProfileData/SampleProfWriter.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 34dba87..f89e6ea 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -708,7 +708,7 @@ private: const uint8_t *BinaryIdsStart = nullptr; // Index to the current record in the record array. - unsigned RecordIndex; + unsigned RecordIndex = 0; // Read the profile summary. Return a pointer pointing to one byte past the // end of the summary data if it exists or the input \c Cur. @@ -721,7 +721,7 @@ public: std::unique_ptr DataBuffer, std::unique_ptr RemappingBuffer = nullptr) : DataBuffer(std::move(DataBuffer)), - RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} + RemappingBuffer(std::move(RemappingBuffer)) {} IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index 963a4d4..5398a44 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -169,7 +169,7 @@ public: protected: SampleProfileWriterText(std::unique_ptr &OS) - : SampleProfileWriter(OS), Indent(0) {} + : SampleProfileWriter(OS) {} std::error_code writeHeader(const SampleProfileMap &ProfileMap) override { LineCount = 0; @@ -180,7 +180,7 @@ private: /// Indent level to use when writing. /// /// This is used when printing inlined callees. - unsigned Indent; + unsigned Indent = 0; friend ErrorOr> SampleProfileWriter::create(std::unique_ptr &OS, -- cgit v1.1 From 1e92ad41d8ef46fa3c628b05ba8ed481fedc17bd Mon Sep 17 00:00:00 2001 From: Shivam Gupta Date: Sat, 8 Jun 2024 22:34:40 +0530 Subject: [lldb] Use const reference for range variables to improve performance (NFC) (#94840) Cppcheck recommends using a const reference for range variables in a for-each loop. This avoids unnecessary copying of elements, improving performance. Caught by cppcheck - lldb/source/API/SBBreakpoint.cpp:717:22: performance: Range variable 'name' should be declared as const reference. [iterateByValue] lldb/source/API/SBTarget.cpp:1150:15: performance: Range variable 'name' should be declared as const reference. [iterateByValue] lldb/source/Breakpoint/Breakpoint.cpp:888:26: performance: Range variable 'name' should be declared as const reference. [iterateByValue] lldb/source/Breakpoint/BreakpointIDList.cpp:262:26: performance: Range variable 'name' should be declared as const reference. [iterateByValue] Fix #91213 Fix #91217 Fix #91219 Fix #91220 --- lldb/source/API/SBBreakpoint.cpp | 2 +- lldb/source/API/SBTarget.cpp | 2 +- lldb/source/Breakpoint/Breakpoint.cpp | 2 +- lldb/source/Breakpoint/BreakpointIDList.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/source/API/SBBreakpoint.cpp b/lldb/source/API/SBBreakpoint.cpp index f1fb6f9..3d90804 100644 --- a/lldb/source/API/SBBreakpoint.cpp +++ b/lldb/source/API/SBBreakpoint.cpp @@ -714,7 +714,7 @@ void SBBreakpoint::GetNames(SBStringList &names) { bkpt_sp->GetTarget().GetAPIMutex()); std::vector names_vec; bkpt_sp->GetNames(names_vec); - for (std::string name : names_vec) { + for (const std::string &name : names_vec) { names.AppendString(name.c_str()); } } diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 962ce9b..adb9e64 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -1147,7 +1147,7 @@ void SBTarget::GetBreakpointNames(SBStringList &names) { std::vector name_vec; target_sp->GetBreakpointNames(name_vec); - for (auto name : name_vec) + for (const auto &name : name_vec) names.AppendString(name.c_str()); } } diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp index ae845e9..dc80d43 100644 --- a/lldb/source/Breakpoint/Breakpoint.cpp +++ b/lldb/source/Breakpoint/Breakpoint.cpp @@ -885,7 +885,7 @@ void Breakpoint::GetDescription(Stream *s, lldb::DescriptionLevel level, s->Printf("Names:"); s->EOL(); s->IndentMore(); - for (std::string name : m_name_list) { + for (const std::string &name : m_name_list) { s->Indent(); s->Printf("%s\n", name.c_str()); } diff --git a/lldb/source/Breakpoint/BreakpointIDList.cpp b/lldb/source/Breakpoint/BreakpointIDList.cpp index 97af1d4..5fc9f95 100644 --- a/lldb/source/Breakpoint/BreakpointIDList.cpp +++ b/lldb/source/Breakpoint/BreakpointIDList.cpp @@ -259,7 +259,7 @@ llvm::Error BreakpointIDList::FindAndReplaceIDRanges( if (!names_found.empty()) { for (BreakpointSP bkpt_sp : target->GetBreakpointList().Breakpoints()) { - for (std::string name : names_found) { + for (const std::string &name : names_found) { if (bkpt_sp->MatchesName(name.c_str())) { StreamString canonical_id_str; BreakpointID::GetCanonicalReference( -- cgit v1.1 From 263be9fb0085001630e0431782a0ac0da7ab58ae Mon Sep 17 00:00:00 2001 From: Job Henandez Lara Date: Sat, 8 Jun 2024 12:07:27 -0700 Subject: [libc][math][c23] fmul correcly rounded to all rounding modes (#91537) This is an implementation of floating point multiplication: It will consist of - `double x double -> float` --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/arm/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/config/windows/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/spec/stdc.td | 3 + libc/src/math/CMakeLists.txt | 2 + libc/src/math/fmul.h | 18 +++++ libc/src/math/generic/CMakeLists.txt | 16 ++++ libc/src/math/generic/fmul.cpp | 128 ++++++++++++++++++++++++++++++ libc/test/src/math/smoke/CMakeLists.txt | 13 +++ libc/test/src/math/smoke/FMulTest.h | 104 ++++++++++++++++++++++++ libc/test/src/math/smoke/fmul_test.cpp | 13 +++ 14 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 libc/src/math/fmul.h create mode 100644 libc/src/math/generic/fmul.cpp create mode 100644 libc/test/src/math/smoke/FMulTest.h create mode 100644 libc/test/src/math/smoke/fmul_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 33ecff8..2265eb0 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -394,6 +394,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fminimum_mag_num libc.src.math.fminimum_mag_numf libc.src.math.fminimum_mag_numl + libc.src.math.fmul libc.src.math.fmod libc.src.math.fmodf libc.src.math.fmodl diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 335981f..d4f9324 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -261,6 +261,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fminimum_mag_num libc.src.math.fminimum_mag_numf libc.src.math.fminimum_mag_numl + libc.src.math.fmul libc.src.math.fmod libc.src.math.fmodf libc.src.math.frexp diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 479af40..67abf851 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -402,6 +402,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fminimum_mag_num libc.src.math.fminimum_mag_numf libc.src.math.fminimum_mag_numl + libc.src.math.fmul libc.src.math.fmod libc.src.math.fmodf libc.src.math.fmodl diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 1beca7e..a72bc98 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -421,6 +421,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fminimum_mag_num libc.src.math.fminimum_mag_numf libc.src.math.fminimum_mag_numl + libc.src.math.fmul libc.src.math.fmod libc.src.math.fmodf libc.src.math.fmodl diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index 7121653..a489872 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -180,6 +180,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fminimum_mag_num libc.src.math.fminimum_mag_numf libc.src.math.fminimum_mag_numl + libc.src.math.fmul libc.src.math.fmod libc.src.math.fmodf libc.src.math.fmodl diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 24b88a52..9d5edd4 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -158,7 +158,7 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fmod | |check| | |check| | |check| | |check| | |check| | 7.12.10.1 | F.10.7.1 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| fmul | N/A | | | N/A | | 7.12.14.3 | F.10.11 | +| fmul | N/A | |check| | | N/A | | 7.12.14.3 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | frexp | |check| | |check| | |check| | | |check| | 7.12.6.7 | F.10.3.7 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 6443914..d629371 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -472,6 +472,9 @@ def StdC : StandardSpec<"stdc"> { GuardedFunctionSpec<"fminimum_mag_numf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, GuardedFunctionSpec<"fminimum_mag_numf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, + FunctionSpec<"fmul", RetValSpec, [ArgSpec, ArgSpec]>, + + FunctionSpec<"fma", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 141f668..e48d60d 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -180,6 +180,8 @@ add_math_entrypoint_object(fminimum_mag_numl) add_math_entrypoint_object(fminimum_mag_numf16) add_math_entrypoint_object(fminimum_mag_numf128) +add_math_entrypoint_object(fmul) + add_math_entrypoint_object(fmod) add_math_entrypoint_object(fmodf) add_math_entrypoint_object(fmodl) diff --git a/libc/src/math/fmul.h b/libc/src/math/fmul.h new file mode 100644 index 0000000..fbc1069 --- /dev/null +++ b/libc/src/math/fmul.h @@ -0,0 +1,18 @@ +//===-- Implementation header for fmul --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMUL_H +#define LLVM_LIBC_SRC_MATH_FMUL_H + +namespace LIBC_NAMESPACE { + +float fmul(double x, double y); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_FMUL_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 9c9073c..8a916e7 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2355,6 +2355,22 @@ add_entrypoint_object( ) add_entrypoint_object( + fmul + SRCS + fmul.cpp + HDRS + ../fmul.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.uint128 + libc.src.__support.CPP.bit + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.rounding_mode + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( sqrt SRCS sqrt.cpp diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp new file mode 100644 index 0000000..40af204 --- /dev/null +++ b/libc/src/math/generic/fmul.cpp @@ -0,0 +1,128 @@ +//===-- Implementation of fmul function------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmul.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/uint128.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) { + auto x_bits = fputil::FPBits(x); + + auto y_bits = fputil::FPBits(y); + + auto output_sign = (x_bits.sign() != y_bits.sign()) ? Sign::NEG : Sign::POS; + + if (LIBC_UNLIKELY(x_bits.is_inf_or_nan() || y_bits.is_inf_or_nan() || + x_bits.is_zero() || y_bits.is_zero())) { + if (x_bits.is_nan()) + return static_cast(x); + if (y_bits.is_nan()) + return static_cast(y); + if (x_bits.is_inf()) + return y_bits.is_zero() + ? fputil::FPBits::quiet_nan().get_val() + : fputil::FPBits::inf(output_sign).get_val(); + if (y_bits.is_inf()) + return x_bits.is_zero() + ? fputil::FPBits::quiet_nan().get_val() + : fputil::FPBits::inf(output_sign).get_val(); + // Now either x or y is zero, and the other one is finite. + return fputil::FPBits::zero(output_sign).get_val(); + } + + uint64_t mx, my; + + // Get mantissa and append the hidden bit if needed. + mx = x_bits.get_explicit_mantissa(); + my = y_bits.get_explicit_mantissa(); + + // Get the corresponding biased exponent. + int ex = x_bits.get_explicit_exponent(); + int ey = y_bits.get_explicit_exponent(); + + // Count the number of leading zeros of the explicit mantissas. + int nx = cpp::countl_zero(mx); + int ny = cpp::countl_zero(my); + // Shift the leading 1 bit to the most significant bit. + mx <<= nx; + my <<= ny; + + // Adjust exponent accordingly: If x or y are normal, we will only need to + // shift by (exponent length + sign bit = 11 bits. If x or y are denormal, we + // will need to shift more than 11 bits. + ex -= (nx - 11); + ey -= (ny - 11); + + UInt128 product = static_cast(mx) * static_cast(my); + int32_t dm1; + uint64_t highs, lows; + uint64_t g, hight, lowt; + uint32_t m; + uint32_t b; + int c; + + highs = static_cast(product >> 64); + c = static_cast(highs >= 0x8000000000000000); + lows = static_cast(product); + + lowt = (lows != 0); + + dm1 = ex + ey + c + fputil::FPBits::EXP_BIAS; + + int round_mode = fputil::quick_get_round(); + if (dm1 >= 255) { + if ((round_mode == FE_TOWARDZERO) || + (round_mode == FE_UPWARD && output_sign.is_neg()) || + (round_mode == FE_DOWNWARD && output_sign.is_pos())) { + return fputil::FPBits::max_normal(output_sign).get_val(); + } + return fputil::FPBits::inf().get_val(); + } else if (dm1 <= 0) { + + int m_shift = 40 + c - dm1; + int g_shift = m_shift - 1; + int h_shift = 64 - g_shift; + m = (m_shift >= 64) ? 0 : static_cast(highs >> m_shift); + + g = g_shift >= 64 ? 0 : (highs >> g_shift) & 1; + hight = h_shift >= 64 ? highs : (highs << h_shift) != 0; + + dm1 = 0; + } else { + m = static_cast(highs >> (39 + c)); + g = (highs >> (38 + c)) & 1; + hight = (highs << (26 - c)) != 0; + } + + if (round_mode == FE_TONEAREST) { + b = g && ((hight && lowt) || ((m & 1) != 0)); + } else if ((output_sign.is_neg() && round_mode == FE_DOWNWARD) || + (output_sign.is_pos() && round_mode == FE_UPWARD)) { + b = (g == 0 && (hight && lowt) == 0) ? 0 : 1; + } else { + b = 0; + } + + uint32_t exp16 = (dm1 << 23); + + uint32_t m2 = m & fputil::FPBits::FRACTION_MASK; + + uint32_t result = (exp16 + m2) + b; + + auto result_bits = fputil::FPBits(result); + result_bits.set_sign(output_sign); + return result_bits.get_val(); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 07e8b5d..8d5b4e6 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -2401,6 +2401,19 @@ add_fp_unittest( ) add_fp_unittest( + fmul_test + SUITE + libc-math-smoke-tests + SRCS + fmul_test.cpp + HDRS + FMulTest.h + DEPENDS + libc.src.math.fmul + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( sqrtf_test SUITE libc-math-smoke-tests diff --git a/libc/test/src/math/smoke/FMulTest.h b/libc/test/src/math/smoke/FMulTest.h new file mode 100644 index 0000000..33fb82c --- /dev/null +++ b/libc/test/src/math/smoke/FMulTest.h @@ -0,0 +1,104 @@ +//===-- Utility class to test fmul[f|l] ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMULTEST_H +#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMULTEST_H + +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +template +class FmulTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef T (*FMulFunc)(R, R); + + void testMul(FMulFunc func) { + + EXPECT_FP_EQ_ALL_ROUNDING(T(15.0), func(3.0, 5.0)); + EXPECT_FP_EQ_ALL_ROUNDING(T(0x1.0p-130), func(0x1.0p1, 0x1.0p-131)); + EXPECT_FP_EQ_ALL_ROUNDING(T(0x1.0p-127), func(0x1.0p2, 0x1.0p-129)); + EXPECT_FP_EQ_ALL_ROUNDING(T(1.0), func(1.0, 1.0)); + + EXPECT_FP_EQ_ALL_ROUNDING(T(0.0), func(-0.0, -0.0)); + EXPECT_FP_EQ_ALL_ROUNDING(T(-0.0), func(0.0, -0.0)); + EXPECT_FP_EQ_ALL_ROUNDING(T(-0.0), func(-0.0, 0.0)); + + EXPECT_FP_EQ_ROUNDING_NEAREST(inf, func(0x1.0p100, 0x1.0p100)); + EXPECT_FP_EQ_ROUNDING_UPWARD(inf, func(0x1.0p100, 0x1.0p100)); + EXPECT_FP_EQ_ROUNDING_DOWNWARD(max_normal, func(0x1.0p100, 0x1.0p100)); + EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO(max_normal, func(0x1.0p100, 0x1.0p100)); + + EXPECT_FP_EQ_ROUNDING_NEAREST( + 0x1p0, func(1.0, 1.0 + 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + EXPECT_FP_EQ_ROUNDING_DOWNWARD( + 0x1p0, func(1.0, 1.0 + 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO( + 0x1p0, func(1.0, 1.0 + 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + EXPECT_FP_EQ_ROUNDING_UPWARD( + 0x1p0, func(1.0, 1.0 + 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + + EXPECT_FP_EQ_ROUNDING_NEAREST( + 0x1.0p-128f + 0x1.0p-148f, + func(1.0, 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + EXPECT_FP_EQ_ROUNDING_UPWARD( + 0x1.0p-128f + 0x1.0p-148f, + func(1.0, 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + EXPECT_FP_EQ_ROUNDING_DOWNWARD( + 0x1.0p-128f + 0x1.0p-149f, + func(1.0, 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO( + 0x1.0p-128f + 0x1.0p-149f, + func(1.0, 0x1.0p-128 + 0x1.0p-149 + 0x1.0p-150)); + } + + void testSpecialInputs(FMulFunc func) { + EXPECT_FP_EQ_ALL_ROUNDING(inf, func(inf, 0x1.0p-129)); + EXPECT_FP_EQ_ALL_ROUNDING(inf, func(0x1.0p-129, inf)); + EXPECT_FP_EQ_ALL_ROUNDING(inf, func(inf, 2.0)); + EXPECT_FP_EQ_ALL_ROUNDING(inf, func(3.0, inf)); + EXPECT_FP_EQ_ALL_ROUNDING(0.0, func(0.0, 0.0)); + + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(neg_inf, aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(aNaN, neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(inf, func(neg_inf, neg_inf)); + + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(0.0, neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(neg_inf, 0.0)); + + EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, func(neg_inf, 1.0)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, func(1.0, neg_inf)); + + EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, func(neg_inf, 0x1.0p-129)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, func(0x1.0p-129, neg_inf)); + + EXPECT_FP_EQ_ALL_ROUNDING(0.0, func(0.0, 0x1.0p-129)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(inf, 0.0)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(0.0, inf)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(0.0, aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(2.0, aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(0x1.0p-129, aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(inf, aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(aNaN, aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(0.0, sNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(2.0, sNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(0x1.0p-129, sNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(inf, sNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, func(sNaN, sNaN)); + } +}; + +#define LIST_FMUL_TESTS(T, R, func) \ + using LlvmLibcFmulTest = FmulTest; \ + TEST_F(LlvmLibcFmulTest, Mul) { testMul(&func); } \ + TEST_F(LlvmLibcFmulTest, NaNInf) { testSpecialInputs(&func); } + +#endif // LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMULTEST_H diff --git a/libc/test/src/math/smoke/fmul_test.cpp b/libc/test/src/math/smoke/fmul_test.cpp new file mode 100644 index 0000000..0eb664f --- /dev/null +++ b/libc/test/src/math/smoke/fmul_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for fmul-------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "FMulTest.h" + +#include "src/math/fmul.h" + +LIST_FMUL_TESTS(float, double, LIBC_NAMESPACE::fmul) -- cgit v1.1 From 44aecca020317fc163fc6f253b03d07828f1d231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= <117831077+HendrikHuebner@users.noreply.github.com> Date: Sat, 8 Jun 2024 21:08:45 +0200 Subject: [libc][math][C23] Implemented remquof128 function (#94809) Added remquof128 function. Closes #94312 --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/spec/stdc.td | 1 + libc/src/math/CMakeLists.txt | 1 + libc/src/math/generic/CMakeLists.txt | 12 ++++++++++++ libc/src/math/generic/remquof128.cpp | 19 +++++++++++++++++++ libc/src/math/remquof128.h | 20 ++++++++++++++++++++ libc/test/src/math/smoke/CMakeLists.txt | 14 ++++++++++++++ libc/test/src/math/smoke/remquof128_test.cpp | 13 +++++++++++++ 10 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 libc/src/math/generic/remquof128.cpp create mode 100644 libc/src/math/remquof128.h create mode 100644 libc/test/src/math/smoke/remquof128_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 2265eb0..adfc1d2 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -575,6 +575,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.nextafterf128 libc.src.math.nextdownf128 libc.src.math.nextupf128 + libc.src.math.remquof128 libc.src.math.rintf128 libc.src.math.roundf128 libc.src.math.scalbnf128 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index a72bc98..e86274a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -606,6 +606,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.nextafterf128 libc.src.math.nextdownf128 libc.src.math.nextupf128 + libc.src.math.remquof128 libc.src.math.rintf128 libc.src.math.roundevenf128 libc.src.math.roundf128 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 9d5edd4..ee2e764 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -200,7 +200,7 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | remainder | |check| | |check| | |check| | | | 7.12.10.2 | F.10.7.2 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| remquo | |check| | |check| | |check| | | | 7.12.10.3 | F.10.7.3 | +| remquo | |check| | |check| | |check| | | |check| | 7.12.10.3 | F.10.7.3 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | rint | |check| | |check| | |check| | |check| | |check| | 7.12.9.4 | F.10.6.4 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index d629371..38521f5 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -581,6 +581,7 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"remainderl", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"remquof", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + GuardedFunctionSpec<"remquof128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, FunctionSpec<"remquo", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"remquol", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index e48d60d..e6cb8fd 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -312,6 +312,7 @@ add_math_entrypoint_object(remainderl) add_math_entrypoint_object(remquo) add_math_entrypoint_object(remquof) +add_math_entrypoint_object(remquof128) add_math_entrypoint_object(remquol) add_math_entrypoint_object(rint) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 8a916e7..9774083 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2433,6 +2433,18 @@ add_entrypoint_object( ) add_entrypoint_object( + remquof128 + SRCS + remquof128.cpp + HDRS + ../remquof128.h + DEPENDS + libc.src.__support.FPUtil.division_and_remainder_operations + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( remquo SRCS remquo.cpp diff --git a/libc/src/math/generic/remquof128.cpp b/libc/src/math/generic/remquof128.cpp new file mode 100644 index 0000000..e195c7b --- /dev/null +++ b/libc/src/math/generic/remquof128.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remquof128 function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/remquof128.h" +#include "src/__support/FPUtil/DivisionAndRemainderOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float128, remquof128, (float128 x, float128 y, int *exp)) { + return fputil::remquo(x, y, *exp); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/remquof128.h b/libc/src/math/remquof128.h new file mode 100644 index 0000000..e9db1ef --- /dev/null +++ b/libc/src/math/remquof128.h @@ -0,0 +1,20 @@ +//===-- Implementation header for remquof128 --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUOF128_H +#define LLVM_LIBC_SRC_MATH_REMQUOF128_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +float128 remquof128(float128 x, float128 y, int *exp); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_REMQUOF128_H diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 8d5b4e6..dfda1bc 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -2528,6 +2528,20 @@ add_fp_unittest( ) add_fp_unittest( + remquof128_test + SUITE + libc-math-smoke-tests + SRCS + remquof128_test.cpp + HDRS + RemQuoTest.h + DEPENDS + libc.src.math.remquof128 + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( remquo_test SUITE libc-math-smoke-tests diff --git a/libc/test/src/math/smoke/remquof128_test.cpp b/libc/test/src/math/smoke/remquof128_test.cpp new file mode 100644 index 0000000..8ef6c3b --- /dev/null +++ b/libc/test/src/math/smoke/remquof128_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for remquof128 ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RemQuoTest.h" + +#include "src/math/remquof128.h" + +LIST_REMQUO_TESTS(float128, LIBC_NAMESPACE::remquof128) -- cgit v1.1 From a43d999d1476c8ab549fb9039dccb03217ee1053 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 8 Jun 2024 20:30:14 +0100 Subject: [VPlan] Check if only first part is used for all per-part VPInsts. Apply the onlyFirstPartUsed logic generally to all per-part VPInstructions. Note that the test changes remove the second part of an unsued first-order recurrence splice. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 11 ++++++++--- llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll | 2 -- .../Transforms/LoopVectorize/X86/fixed-order-recurrence.ll | 1 - llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll | 6 ------ .../LoopVectorize/scalable-first-order-recurrence.ll | 1 - 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index cb707d7..738bb67 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -324,9 +324,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { if (Instruction::isBinaryOp(getOpcode())) { bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); - if (Part != 0 && vputils::onlyFirstPartUsed(this)) - return State.get(this, 0, OnlyFirstLaneUsed); - Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); auto *Res = @@ -628,6 +625,7 @@ void VPInstruction::execute(VPTransformState &State) { canGenerateScalarForFirstLane() && (vputils::onlyFirstLaneUsed(this) || isVectorToScalar()); bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); + bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this); for (unsigned Part = 0; Part < State.UF; ++Part) { if (GeneratesPerAllLanes) { for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); @@ -639,6 +637,13 @@ void VPInstruction::execute(VPTransformState &State) { continue; } + if (Part != 0 && OnlyFirstPartUsed && hasResult()) { + Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly); + State.set(this, Part0, Part, + /*IsScalar*/ GeneratesPerFirstLaneOnly); + continue; + } + Value *GeneratedValue = generatePerPart(State, Part); if (!hasResult()) continue; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 32dc2ec..4b6eb66 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -121,7 +121,6 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> ; CHECK-NEXT: [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -186,7 +185,6 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 9457500..86b3e54 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -235,7 +235,6 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <16 x i64> zeroinitializer, [[VEC_IND]] ; CHECK-NEXT: [[TMP1]] = sub nsw <16 x i64> zeroinitializer, [[STEP_ADD]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VECTOR_RECUR]], <16 x i64> [[TMP0]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i64> [[TMP0]], <16 x i64> [[TMP1]], <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i64> [[TMP1]], i32 15 ; CHECK-NEXT: store i64 [[TMP4]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 6d3654d..864af18 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -912,7 +912,6 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -1121,7 +1120,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]], i32 2 ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[VECTOR_RECUR_EXTRACT]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1393,7 +1391,6 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP0]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 @@ -2572,7 +2569,6 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP5]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR2]], <4 x i32> [[TMP4]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP2]], ; UNROLL-NO-IC-NEXT: [[TMP9]] = add <4 x i16> [[TMP3]], ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP8]], <4 x i32> @@ -3491,7 +3487,6 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP7]] = zext <4 x i16> [[TMP5]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 @@ -3665,7 +3660,6 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[TMP0]], ; UNROLL-NO-IC-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP2]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index b959894..f9e7c82 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -172,7 +172,6 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi [ %vector.recur.init, %vector.ph ], [ shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), %vector.body ] ; CHECK-VF4UF2: %[[SPLICE1:.*]] = call @llvm.vector.splice.nxv4i64( %vector.recur, shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), i32 -1) -; CHECK-VF4UF2: %[[SPLICE2:.*]] = call @llvm.vector.splice.nxv4i64( shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), i32 -1) ; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body entry: br label %scalar.body -- cgit v1.1 From 643e4718af3124964cd0f14036bb6fba49e338f7 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 9 Jun 2024 03:44:19 +0800 Subject: [RISCV][GISel] Add calling convention support for half (#94110) This patch adds initial support to the half type on RISC-V. --- llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 4 +- .../RISCV/GISel/RISCVInstructionSelector.cpp | 2 + .../GlobalISel/irtranslator/calling-conv-half.ll | 516 +++++++++++++++++++++ 3 files changed, 520 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index c18892a..beee940 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -340,7 +340,7 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget, // supported yet. if (T->isIntegerTy()) return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2; - if (T->isFloatTy() || T->isDoubleTy()) + if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy()) return true; if (T->isPointerTy()) return true; @@ -361,7 +361,7 @@ static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget, // supported yet. if (T->isIntegerTy()) return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2; - if (T->isFloatTy() || T->isDoubleTy()) + if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy()) return true; if (T->isPointerTy()) return true; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index da8daa5..a091380 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -849,6 +849,8 @@ const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank( } if (RB.getID() == RISCV::FPRBRegBankID) { + if (Ty.getSizeInBits() == 16) + return &RISCV::FPR16RegClass; if (Ty.getSizeInBits() == 32) return &RISCV::FPR32RegClass; if (Ty.getSizeInBits() == 64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll new file mode 100644 index 0000000..0a0828e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll @@ -0,0 +1,516 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+f -global-isel -stop-after=irtranslator < %s \ +; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv32 -mattr=+zfh -global-isel -stop-after=irtranslator < %s \ +; RUN: | FileCheck -check-prefix=RV32IZFH %s + +define half @callee_half_in_regs(half %x) nounwind { + ; RV32I-LABEL: name: callee_half_in_regs + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: callee_half_in_regs + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $f10_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: callee_half_in_regs + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $f10_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ret half %x +} + +define half @caller_half_in_regs(half %x) nounwind { + ; RV32I-LABEL: name: caller_half_in_regs + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32) + ; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @caller_half_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit-def $x10 + ; RV32I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT1]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: caller_half_in_regs + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $f10_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: PseudoCALL target-flags(riscv-call) @caller_half_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_f, implicit-def $f10_f + ; RV32IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT1]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: caller_half_in_regs + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $f10_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: PseudoCALL target-flags(riscv-call) @caller_half_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_h, implicit-def $f10_h + ; RV32IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + %y = call half @caller_half_in_regs(half %x) + ret half %y +} + +define half @callee_half_mixed_with_int(i32 %x0, half %x) nounwind { + ; RV32I-LABEL: name: callee_half_mixed_with_int + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: callee_half_mixed_with_int + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10, $f10_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: callee_half_mixed_with_int + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $x10, $f10_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ret half %x +} + +define half @caller_half_mixed_with_int(half %x, i32 %x0) nounwind { + ; RV32I-LABEL: name: caller_half_mixed_with_int + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: $x10 = COPY [[COPY1]](s32) + ; RV32I-NEXT: $x11 = COPY [[ANYEXT]](s32) + ; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_mixed_with_int, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV32I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT1]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: caller_half_mixed_with_int + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10, $f10_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: $x10 = COPY [[COPY1]](s32) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_mixed_with_int, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $f10_f, implicit-def $f10_f + ; RV32IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV32IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT1]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: caller_half_mixed_with_int + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $x10, $f10_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: $x10 = COPY [[COPY1]](s32) + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_mixed_with_int, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $f10_h, implicit-def $f10_h + ; RV32IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + %y = call half @callee_half_mixed_with_int(i32 %x0, half %x) + ret half %y +} + +define half @callee_half_return_stack1(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, half %x) nounwind { + ; RV32I-LABEL: name: callee_half_return_stack1 + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14 + ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15 + ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16 + ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17 + ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16) + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: callee_half_return_stack1 + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $f10_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; RV32IF-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; RV32IF-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14 + ; RV32IF-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15 + ; RV32IF-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16 + ; RV32IF-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17 + ; RV32IF-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: callee_half_return_stack1 + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $f10_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; RV32IZFH-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; RV32IZFH-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14 + ; RV32IZFH-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15 + ; RV32IZFH-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16 + ; RV32IZFH-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17 + ; RV32IZFH-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY8]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ret half %x +} + +define half @caller_half_return_stack1(i32 %v1, half %x) nounwind { + ; RV32I-LABEL: name: caller_half_return_stack1 + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C7]](s32) + ; RV32I-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16) + ; RV32I-NEXT: $x10 = COPY [[C]](s32) + ; RV32I-NEXT: $x11 = COPY [[C1]](s32) + ; RV32I-NEXT: $x12 = COPY [[C2]](s32) + ; RV32I-NEXT: $x13 = COPY [[COPY]](s32) + ; RV32I-NEXT: $x14 = COPY [[C3]](s32) + ; RV32I-NEXT: $x15 = COPY [[C4]](s32) + ; RV32I-NEXT: $x16 = COPY [[C5]](s32) + ; RV32I-NEXT: $x17 = COPY [[C6]](s32) + ; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack1, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; RV32I-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; RV32I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT1]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: caller_half_return_stack1 + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10, $f10_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32IF-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IF-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IF-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV32IF-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; RV32IF-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; RV32IF-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV32IF-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: $x10 = COPY [[C]](s32) + ; RV32IF-NEXT: $x11 = COPY [[C1]](s32) + ; RV32IF-NEXT: $x12 = COPY [[C2]](s32) + ; RV32IF-NEXT: $x13 = COPY [[COPY]](s32) + ; RV32IF-NEXT: $x14 = COPY [[C3]](s32) + ; RV32IF-NEXT: $x15 = COPY [[C4]](s32) + ; RV32IF-NEXT: $x16 = COPY [[C5]](s32) + ; RV32IF-NEXT: $x17 = COPY [[C6]](s32) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack1, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $f10_f, implicit-def $f10_f + ; RV32IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV32IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT1]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: caller_half_return_stack1 + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $x10, $f10_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IZFH-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IZFH-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV32IZFH-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; RV32IZFH-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; RV32IZFH-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV32IZFH-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: $x10 = COPY [[C]](s32) + ; RV32IZFH-NEXT: $x11 = COPY [[C1]](s32) + ; RV32IZFH-NEXT: $x12 = COPY [[C2]](s32) + ; RV32IZFH-NEXT: $x13 = COPY [[COPY]](s32) + ; RV32IZFH-NEXT: $x14 = COPY [[C3]](s32) + ; RV32IZFH-NEXT: $x15 = COPY [[C4]](s32) + ; RV32IZFH-NEXT: $x16 = COPY [[C5]](s32) + ; RV32IZFH-NEXT: $x17 = COPY [[C6]](s32) + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) + ; RV32IZFH-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack1, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $f10_h, implicit-def $f10_h + ; RV32IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + %y = call half @callee_half_return_stack1(i32 0, i32 1, i32 2, i32 %v1, i32 5, i32 6, i32 7, i32 8, half %x) + ret half %y +} + +define half @callee_half_return_stack2(half %v1, half %v2, half %v3, half %v4, half %v5, half %v6, half %v7, half %v8, half %x) nounwind { + ; RV32I-LABEL: name: callee_half_return_stack2 + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; RV32I-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; RV32I-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14 + ; RV32I-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15 + ; RV32I-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16 + ; RV32I-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17 + ; RV32I-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16) + ; RV32I-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC8]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: callee_half_return_stack2 + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f, $f14_f, $f15_f, $f16_f, $f17_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f11_f + ; RV32IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f12_f + ; RV32IF-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV32IF-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $f13_f + ; RV32IF-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; RV32IF-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $f14_f + ; RV32IF-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; RV32IF-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $f15_f + ; RV32IF-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; RV32IF-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $f16_f + ; RV32IF-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; RV32IF-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $f17_f + ; RV32IF-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; RV32IF-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IF-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC8]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: callee_half_return_stack2 + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $x10, $f10_h, $f11_h, $f12_h, $f13_h, $f14_h, $f15_h, $f16_h, $f17_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h + ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f12_h + ; RV32IZFH-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY $f13_h + ; RV32IZFH-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY $f14_h + ; RV32IZFH-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY $f15_h + ; RV32IZFH-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY $f16_h + ; RV32IZFH-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY $f17_h + ; RV32IZFH-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; RV32IZFH-NEXT: $f10_h = COPY [[TRUNC]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ret half %x +} + +define half @caller_half_return_stack2(half %x, half %y) nounwind { + ; RV32I-LABEL: name: caller_half_return_stack2 + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32I-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; RV32I-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4200 + ; RV32I-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; RV32I-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[C1]](s16) + ; RV32I-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32I-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32I-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32I-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; RV32I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; RV32I-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32) + ; RV32I-NEXT: $x11 = COPY [[ANYEXT1]](s32) + ; RV32I-NEXT: $x12 = COPY [[ANYEXT2]](s32) + ; RV32I-NEXT: $x13 = COPY [[ANYEXT3]](s32) + ; RV32I-NEXT: $x14 = COPY [[ANYEXT4]](s32) + ; RV32I-NEXT: $x15 = COPY [[ANYEXT5]](s32) + ; RV32I-NEXT: $x16 = COPY [[ANYEXT6]](s32) + ; RV32I-NEXT: $x17 = COPY [[ANYEXT7]](s32) + ; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack2, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; RV32I-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; RV32I-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; RV32I-NEXT: $x10 = COPY [[ANYEXT9]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32IF-LABEL: name: caller_half_return_stack2 + ; RV32IF: bb.1 (%ir-block.0): + ; RV32IF-NEXT: liveins: $f10_f, $f11_f + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f11_f + ; RV32IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV32IF-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; RV32IF-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4200 + ; RV32IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; RV32IF-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[C1]](s16) + ; RV32IF-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32IF-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32IF-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV32IF-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV32IF-NEXT: $f11_f = COPY [[ANYEXT1]](s32) + ; RV32IF-NEXT: $f12_f = COPY [[ANYEXT2]](s32) + ; RV32IF-NEXT: $f13_f = COPY [[ANYEXT3]](s32) + ; RV32IF-NEXT: $f14_f = COPY [[ANYEXT4]](s32) + ; RV32IF-NEXT: $f15_f = COPY [[ANYEXT5]](s32) + ; RV32IF-NEXT: $f16_f = COPY [[ANYEXT6]](s32) + ; RV32IF-NEXT: $f17_f = COPY [[ANYEXT7]](s32) + ; RV32IF-NEXT: $x10 = COPY [[ANYEXT8]](s32) + ; RV32IF-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack2, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_f, implicit $f11_f, implicit $f12_f, implicit $f13_f, implicit $f14_f, implicit $f15_f, implicit $f16_f, implicit $f17_f, implicit $x10, implicit-def $f10_f + ; RV32IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV32IF-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV32IF-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; RV32IF-NEXT: $f10_f = COPY [[ANYEXT9]](s32) + ; RV32IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV32IZFH-LABEL: name: caller_half_return_stack2 + ; RV32IZFH: bb.1 (%ir-block.0): + ; RV32IZFH-NEXT: liveins: $f10_h, $f11_h + ; RV32IZFH-NEXT: {{ $}} + ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h + ; RV32IZFH-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; RV32IZFH-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4200 + ; RV32IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: $f11_h = COPY [[C]](s16) + ; RV32IZFH-NEXT: $f12_h = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: $f13_h = COPY [[C1]](s16) + ; RV32IZFH-NEXT: $f14_h = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: $f15_h = COPY [[COPY1]](s16) + ; RV32IZFH-NEXT: $f16_h = COPY [[COPY1]](s16) + ; RV32IZFH-NEXT: $f17_h = COPY [[COPY1]](s16) + ; RV32IZFH-NEXT: $x10 = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack2, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_h, implicit $f11_h, implicit $f12_h, implicit $f13_h, implicit $f14_h, implicit $f15_h, implicit $f16_h, implicit $f17_h, implicit $x10, implicit-def $f10_h + ; RV32IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV32IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) + ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + %z = call half @callee_half_return_stack2(half %x, half 1.0, half %x, half 3.0, half %x, half %y, half %y, half %y, half %x) + ret half %z +} -- cgit v1.1 From 998c33e5fccd421a910c0a12689b35daf73e1856 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 8 Jun 2024 21:40:29 +0100 Subject: [VPlan] Mark FirstOrderRecurrenceSplice as not having side-effects. Now that FOR exit and resume value creation is explicitly modeled in VPlan (05e1b5340b0caf1, 07b330132c0b) it doesn't depend on the first order recurrence splice being preserved and it can now be marked as not having side-effects. This allows removal of first-order-recurrence-splce if the FOR is only used in the exit or as scalar ph resume value. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 + .../LoopVectorize/AArch64/induction-costs.ll | 2 - .../AArch64/loop-vectorization-factors.ll | 2 - .../LoopVectorize/X86/fixed-order-recurrence.ll | 1 - llvm/test/Transforms/LoopVectorize/X86/pr72969.ll | 1 - .../LoopVectorize/first-order-recurrence-chains.ll | 3 -- .../first-order-recurrence-multiply-recurrences.ll | 50 ++++++++++++++++++---- .../LoopVectorize/first-order-recurrence.ll | 18 -------- .../scalable-first-order-recurrence.ll | 1 - 9 files changed, 42 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 738bb67..2093aab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -138,6 +138,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExtractFromEnd: + case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::LogicalAnd: case VPInstruction::PtrAdd: return false; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 4b6eb66..56616d4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -120,7 +120,6 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> ; CHECK-NEXT: [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[TMP8]], <2 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -184,7 +183,6 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index b746303..fae23b6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -776,7 +776,6 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP4]], <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] @@ -860,7 +859,6 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP8]] to <16 x i8> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 86b3e54..d58b4f5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -234,7 +234,6 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <16 x i64> zeroinitializer, [[VEC_IND]] ; CHECK-NEXT: [[TMP1]] = sub nsw <16 x i64> zeroinitializer, [[STEP_ADD]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VECTOR_RECUR]], <16 x i64> [[TMP0]], <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i64> [[TMP1]], i32 15 ; CHECK-NEXT: store i64 [[TMP4]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 829a91f..ec83898 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -77,7 +77,6 @@ define void @test(ptr %p) { ; VEC-NEXT: store i64 0, ptr [[TMP26]], align 8 ; VEC-NEXT: [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], ; VEC-NEXT: [[TMP28]] = zext <4 x i16> [[TMP27]] to <4 x i64> -; VEC-NEXT: [[TMP29:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP28]], <4 x i32> ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index 447f0b0..8dd1e71 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -560,7 +560,6 @@ define ptr @test_first_order_recurrences_and_pointer_induction1(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ , %vector.ph ], [ [[TMP0:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[VECTOR_RECUR]], <4 x ptr> [[TMP0]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8 @@ -604,7 +603,6 @@ define ptr @test_first_order_recurrences_and_pointer_induction2(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ , %vector.ph ], [ [[TMP0:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[VECTOR_RECUR]], <4 x ptr> [[TMP0]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8 @@ -653,7 +651,6 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3 ; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 937e01e..7e15f03 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -220,22 +220,54 @@ exit: define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) { ; CHECK-LABEL: @test_pr54233_for_depend_on_each_other( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]] -; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]] -; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]] +; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 864af18..665ac0e 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -911,7 +911,6 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -1005,7 +1004,6 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -1107,10 +1105,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP20]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP21]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP22]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP27]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP23]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = load i32, ptr [[TMP25]], align 4 @@ -1119,7 +1113,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]], i32 2 ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[VECTOR_RECUR_EXTRACT]], i32 3 -; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1225,7 +1218,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1 ; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]], i32 2 ; SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[VECTOR_RECUR_EXTRACT]], i32 3 -; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1274,7 +1266,6 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1333,7 +1324,6 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1390,7 +1380,6 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP0]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 @@ -1460,7 +1449,6 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[TMP0]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SINK-AFTER-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP0]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 @@ -2568,7 +2556,6 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP5]] = zext <4 x i16> [[TMP3]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR2]], <4 x i32> [[TMP4]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP2]], ; UNROLL-NO-IC-NEXT: [[TMP9]] = add <4 x i16> [[TMP3]], ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP8]], <4 x i32> @@ -2674,7 +2661,6 @@ define void @sink_dead_inst(ptr %a) { ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP2]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP2]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP4]] = add <4 x i16> [[TMP1]], ; SINK-AFTER-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP6:%.*]] = sub <4 x i16> [[TMP5]], @@ -3486,7 +3472,6 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP7]] = zext <4 x i16> [[TMP5]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 @@ -3585,7 +3570,6 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; SINK-AFTER-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP3]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]] ; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 @@ -3659,7 +3643,6 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[TMP0]], ; UNROLL-NO-IC-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP2]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 @@ -3729,7 +3712,6 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], -; SINK-AFTER-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP1]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index f9e7c82..b372105 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -171,7 +171,6 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2-LABEL: @constant_folded_previous_value ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi [ %vector.recur.init, %vector.ph ], [ shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), %vector.body ] -; CHECK-VF4UF2: %[[SPLICE1:.*]] = call @llvm.vector.splice.nxv4i64( %vector.recur, shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), i32 -1) ; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body entry: br label %scalar.body -- cgit v1.1 From e62c2146aa9a195c219b3585eb36c6987857c1bb Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 8 Jun 2024 14:03:30 -0700 Subject: [ProfileData] Simplify calls to readNext in readBinaryIdsInternal (NFC) (#94862) readNext has two variants: - readNext(ptr) - readNext(ptr, endian) This patch uses the latter to simplify readBinaryIdsInternal. Both forms default to unaligned. --- llvm/lib/ProfileData/InstrProfReader.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 27855bf..54a2a61 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -113,12 +113,7 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer, instrprof_error::malformed, "not enough data to read binary id length"); - uint64_t BILen = 0; - if (Endian == llvm::endianness::little) - BILen = endian::readNext(BI); - else - BILen = endian::readNext(BI); - + uint64_t BILen = endian::readNext(BI, Endian); if (BILen == 0) return make_error(instrprof_error::malformed, "binary id length is 0"); -- cgit v1.1 From febfbff6cde47b1632424a4c5192a4aae79892c5 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 9 Jun 2024 00:23:43 +0300 Subject: [mlir][nfc] Sort test passes registration (#94201) --- mlir/tools/mlir-opt/mlir-opt.cpp | 68 ++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 0e8b161..d2ba3d0 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -32,21 +32,21 @@ using namespace mlir; // Defined in the test directory, no public header. namespace mlir { -void registerConvertToTargetEnvPass(); void registerCloneTestPasses(); +void registerConvertToTargetEnvPass(); void registerLazyLoadingTestPasses(); +void registerLoopLikeInterfaceTestPasses(); void registerPassManagerTestPass(); void registerPrintSpirvAvailabilityPass(); -void registerLoopLikeInterfaceTestPasses(); +void registerRegionTestPasses(); void registerShapeFunctionTestPasses(); void registerSideEffectTestPasses(); void registerSliceAnalysisTestPass(); void registerSymbolTestPasses(); -void registerRegionTestPasses(); -void registerTestAffineDataCopyPass(); void registerTestAffineAccessAnalysisPass(); -void registerTestAffineReifyValueBoundsPass(); +void registerTestAffineDataCopyPass(); void registerTestAffineLoopUnswitchingPass(); +void registerTestAffineReifyValueBoundsPass(); void registerTestAffineWalk(); void registerTestBytecodeRoundtripPasses(); void registerTestDecomposeAffineOpPass(); @@ -56,10 +56,10 @@ void registerTestGpuMemoryPromotionPass(); void registerTestLoopPermutationPass(); void registerTestMatchers(); void registerTestOperationEqualPass(); +void registerTestPreserveUseListOrders(); void registerTestPrintDefUsePass(); void registerTestPrintInvalidPass(); void registerTestPrintNestingPass(); -void registerTestPreserveUseListOrders(); void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); void registerTestSpirvModuleCombinerPass(); @@ -68,7 +68,6 @@ void registerTosaTestQuantUtilAPIPass(); void registerVectorizerTestPass(); namespace test { -void registerTestCompositePass(); void registerCommutativityUtils(); void registerConvertCallOpPass(); void registerConvertFuncOpPass(); @@ -77,12 +76,15 @@ void registerMemRefBoundCheck(); void registerPatternsTestPass(); void registerSimpleParametricTilingPass(); void registerTestAffineLoopParametricTilingPass(); -void registerTestArithEmulateWideIntPass(); void registerTestAliasAnalysisPass(); +void registerTestArithEmulateWideIntPass(); void registerTestBuiltinAttributeInterfaces(); void registerTestBuiltinDistinctAttributes(); void registerTestCallGraphPass(); void registerTestCfAssertPass(); +void registerTestCFGLoopInfoPass(); +void registerTestComposeSubView(); +void registerTestCompositePass(); void registerTestConstantFold(); void registerTestControlFlowSink(); void registerTestDataLayoutPropagation(); @@ -95,12 +97,10 @@ void registerTestDynamicPipelinePass(); void registerTestEmulateNarrowTypePass(); void registerTestExpandMathPass(); void registerTestFooAnalysisPass(); -void registerTestComposeSubView(); -void registerTestMultiBuffering(); -void registerTestIntRangeInference(); -void registerTestIRVisitorsPass(); void registerTestGenericIRVisitorsPass(); void registerTestInterfaces(); +void registerTestIntRangeInference(); +void registerTestIRVisitorsPass(); void registerTestLastModifiedPass(); void registerTestLinalgDecomposeOps(); void registerTestLinalgDropUnitDims(); @@ -110,7 +110,6 @@ void registerTestLinalgTransforms(); void registerTestLivenessAnalysisPass(); void registerTestLivenessPass(); void registerTestLoopFusion(); -void registerTestCFGLoopInfoPass(); void registerTestLoopMappingPass(); void registerTestLoopUnrollingPass(); void registerTestLowerToArmNeon(); @@ -123,12 +122,14 @@ void registerTestMathPolynomialApproximationPass(); void registerTestMathToVCIXPass(); void registerTestMemRefDependenceCheck(); void registerTestMemRefStrideCalculation(); -void registerTestMeshSimplificationsPass(); void registerTestMeshReshardingSpmdizationPass(); -void registerTestOpLoweringPasses(); +void registerTestMeshSimplificationsPass(); +void registerTestMultiBuffering(); void registerTestNextAccessPass(); +void registerTestNVGPULowerings(); void registerTestOneToNTypeConversionPass(); void registerTestOpaqueLoc(); +void registerTestOpLoweringPasses(); void registerTestPadFusion(); void registerTestRecursiveTypesPass(); void registerTestSCFUpliftWhileToFor(); @@ -141,10 +142,9 @@ void registerTestTensorCopyInsertionPass(); void registerTestTensorTransforms(); void registerTestTopologicalSortAnalysisPass(); void registerTestTransformDialectEraseSchedulePass(); -void registerTestWrittenToPass(); void registerTestVectorLowerings(); void registerTestVectorReductionToSPIRVDotProd(); -void registerTestNVGPULowerings(); +void registerTestWrittenToPass(); #if MLIR_ENABLE_PDL_IN_PATTERNMATCH void registerTestDialectConversionPasses(); void registerTestPDLByteCodePass(); @@ -164,17 +164,17 @@ void registerTestTransformDialectExtension(DialectRegistry &); void registerTestPasses() { registerCloneTestPasses(); registerConvertToTargetEnvPass(); - registerPassManagerTestPass(); - registerPrintSpirvAvailabilityPass(); registerLazyLoadingTestPasses(); registerLoopLikeInterfaceTestPasses(); + registerPassManagerTestPass(); + registerPrintSpirvAvailabilityPass(); + registerRegionTestPasses(); registerShapeFunctionTestPasses(); registerSideEffectTestPasses(); registerSliceAnalysisTestPass(); registerSymbolTestPasses(); - registerRegionTestPasses(); - registerTestAffineDataCopyPass(); registerTestAffineAccessAnalysisPass(); + registerTestAffineDataCopyPass(); registerTestAffineLoopUnswitchingPass(); registerTestAffineReifyValueBoundsPass(); registerTestAffineWalk(); @@ -186,18 +186,17 @@ void registerTestPasses() { registerTestLoopPermutationPass(); registerTestMatchers(); registerTestOperationEqualPass(); + registerTestPreserveUseListOrders(); registerTestPrintDefUsePass(); registerTestPrintInvalidPass(); registerTestPrintNestingPass(); - registerTestPreserveUseListOrders(); registerTestReducer(); registerTestSpirvEntryPointABIPass(); registerTestSpirvModuleCombinerPass(); registerTestTraitsPass(); - registerVectorizerTestPass(); registerTosaTestQuantUtilAPIPass(); + registerVectorizerTestPass(); - mlir::test::registerTestCompositePass(); mlir::test::registerCommutativityUtils(); mlir::test::registerConvertCallOpPass(); mlir::test::registerConvertFuncOpPass(); @@ -212,24 +211,25 @@ void registerTestPasses() { mlir::test::registerTestBuiltinDistinctAttributes(); mlir::test::registerTestCallGraphPass(); mlir::test::registerTestCfAssertPass(); + mlir::test::registerTestCFGLoopInfoPass(); + mlir::test::registerTestComposeSubView(); + mlir::test::registerTestCompositePass(); mlir::test::registerTestConstantFold(); mlir::test::registerTestControlFlowSink(); - mlir::test::registerTestDiagnosticsPass(); - mlir::test::registerTestDecomposeCallGraphTypes(); mlir::test::registerTestDataLayoutPropagation(); mlir::test::registerTestDataLayoutQuery(); mlir::test::registerTestDeadCodeAnalysisPass(); + mlir::test::registerTestDecomposeCallGraphTypes(); + mlir::test::registerTestDiagnosticsPass(); mlir::test::registerTestDominancePass(); mlir::test::registerTestDynamicPipelinePass(); mlir::test::registerTestEmulateNarrowTypePass(); mlir::test::registerTestExpandMathPass(); mlir::test::registerTestFooAnalysisPass(); - mlir::test::registerTestComposeSubView(); - mlir::test::registerTestMultiBuffering(); - mlir::test::registerTestIntRangeInference(); - mlir::test::registerTestIRVisitorsPass(); mlir::test::registerTestGenericIRVisitorsPass(); mlir::test::registerTestInterfaces(); + mlir::test::registerTestIntRangeInference(); + mlir::test::registerTestIRVisitorsPass(); mlir::test::registerTestLastModifiedPass(); mlir::test::registerTestLinalgDecomposeOps(); mlir::test::registerTestLinalgDropUnitDims(); @@ -239,7 +239,6 @@ void registerTestPasses() { mlir::test::registerTestLivenessAnalysisPass(); mlir::test::registerTestLivenessPass(); mlir::test::registerTestLoopFusion(); - mlir::test::registerTestCFGLoopInfoPass(); mlir::test::registerTestLoopMappingPass(); mlir::test::registerTestLoopUnrollingPass(); mlir::test::registerTestLowerToArmNeon(); @@ -252,12 +251,14 @@ void registerTestPasses() { mlir::test::registerTestMathToVCIXPass(); mlir::test::registerTestMemRefDependenceCheck(); mlir::test::registerTestMemRefStrideCalculation(); - mlir::test::registerTestOpLoweringPasses(); - mlir::test::registerTestMeshSimplificationsPass(); mlir::test::registerTestMeshReshardingSpmdizationPass(); + mlir::test::registerTestMeshSimplificationsPass(); + mlir::test::registerTestMultiBuffering(); mlir::test::registerTestNextAccessPass(); + mlir::test::registerTestNVGPULowerings(); mlir::test::registerTestOneToNTypeConversionPass(); mlir::test::registerTestOpaqueLoc(); + mlir::test::registerTestOpLoweringPasses(); mlir::test::registerTestPadFusion(); mlir::test::registerTestRecursiveTypesPass(); mlir::test::registerTestSCFUpliftWhileToFor(); @@ -272,7 +273,6 @@ void registerTestPasses() { mlir::test::registerTestTransformDialectEraseSchedulePass(); mlir::test::registerTestVectorLowerings(); mlir::test::registerTestVectorReductionToSPIRVDotProd(); - mlir::test::registerTestNVGPULowerings(); mlir::test::registerTestWrittenToPass(); #if MLIR_ENABLE_PDL_IN_PATTERNMATCH mlir::test::registerTestDialectConversionPasses(); -- cgit v1.1 From c2d68c42a44853dccf8df736ed9be85a8be3ef70 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Sat, 8 Jun 2024 15:16:37 -0500 Subject: [InstCombine] Add tests for propagating flags when folding consecutative shifts; NFC --- llvm/test/Transforms/InstCombine/shift.ll | 80 +++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 8da52e0..8726747 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -2240,4 +2240,84 @@ define i129 @shift_zext_not_nneg(i8 %arg) { ret i129 %shl } +define i8 @src_shl_nsw(i8 %x) { +; CHECK-LABEL: @src_shl_nsw( +; CHECK-NEXT: [[R:%.*]] = shl i8 32, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = shl nsw i8 1, %x + %r = shl nsw i8 %sh, 5 + ret i8 %r +} + +define i8 @src_shl_nsw_fail(i8 %x) { +; CHECK-LABEL: @src_shl_nsw_fail( +; CHECK-NEXT: [[R:%.*]] = shl i8 32, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = shl nsw i8 1, %x + %r = shl i8 %sh, 5 + ret i8 %r +} + +define i8 @src_shl_nuw(i8 %x) { +; CHECK-LABEL: @src_shl_nuw( +; CHECK-NEXT: [[R:%.*]] = shl i8 12, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = shl nuw i8 3, %x + %r = shl nuw i8 %sh, 2 + ret i8 %r +} + +define i8 @src_shl_nuw_fail(i8 %x) { +; CHECK-LABEL: @src_shl_nuw_fail( +; CHECK-NEXT: [[R:%.*]] = shl i8 12, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = shl i8 3, %x + %r = shl nuw i8 %sh, 2 + ret i8 %r +} + +define i8 @src_lshr_exact(i8 %x) { +; CHECK-LABEL: @src_lshr_exact( +; CHECK-NEXT: [[R:%.*]] = lshr i8 48, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = lshr exact i8 96, %x + %r = lshr exact i8 %sh, 1 + ret i8 %r +} + +define i8 @src_lshr_exact_fail(i8 %x) { +; CHECK-LABEL: @src_lshr_exact_fail( +; CHECK-NEXT: [[R:%.*]] = lshr i8 48, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = lshr exact i8 96, %x + %r = lshr i8 %sh, 1 + ret i8 %r +} + +define i8 @src_ashr_exact(i8 %x) { +; CHECK-LABEL: @src_ashr_exact( +; CHECK-NEXT: [[R:%.*]] = ashr i8 -8, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = ashr exact i8 -32, %x + %r = ashr exact i8 %sh, 2 + ret i8 %r +} + +define i8 @src_ashr_exact_fail(i8 %x) { +; CHECK-LABEL: @src_ashr_exact_fail( +; CHECK-NEXT: [[R:%.*]] = ashr i8 -8, [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %sh = ashr i8 -32, %x + %r = ashr exact i8 %sh, 2 + ret i8 %r +} + declare i16 @llvm.umax.i16(i16, i16) -- cgit v1.1 From 2900d035f45fa04078ce9b1ec1e980b113f16013 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Sat, 8 Jun 2024 15:16:41 -0500 Subject: [InstCombine] Propagate flags when folding consecutative shifts When we fold `(shift (shift C0, x), C1)` we can propagate flags that are common to both shifts. Proofs: https://alive2.llvm.org/ce/z/LkEzXD Closes #94872 --- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 15 ++++++++++++--- llvm/test/Transforms/InstCombine/shift.ll | 8 ++++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 9ff817d..4a014ab 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -767,11 +767,20 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *C1, // (C2 >> X) >> C1 --> (C2 >> C1) >> X Constant *C2; Value *X; - if (match(Op0, m_BinOp(I.getOpcode(), m_ImmConstant(C2), m_Value(X)))) - return BinaryOperator::Create( + bool IsLeftShift = I.getOpcode() == Instruction::Shl; + if (match(Op0, m_BinOp(I.getOpcode(), m_ImmConstant(C2), m_Value(X)))) { + Instruction *R = BinaryOperator::Create( I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), C2, C1), X); + BinaryOperator *BO0 = cast(Op0); + if (IsLeftShift) { + R->setHasNoUnsignedWrap(I.hasNoUnsignedWrap() && + BO0->hasNoUnsignedWrap()); + R->setHasNoSignedWrap(I.hasNoSignedWrap() && BO0->hasNoSignedWrap()); + } else + R->setIsExact(I.isExact() && BO0->isExact()); + return R; + } - bool IsLeftShift = I.getOpcode() == Instruction::Shl; Type *Ty = I.getType(); unsigned TypeBits = Ty->getScalarSizeInBits(); diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 8726747..0700d7a 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -2242,7 +2242,7 @@ define i129 @shift_zext_not_nneg(i8 %arg) { define i8 @src_shl_nsw(i8 %x) { ; CHECK-LABEL: @src_shl_nsw( -; CHECK-NEXT: [[R:%.*]] = shl i8 32, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nsw i8 32, [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sh = shl nsw i8 1, %x @@ -2262,7 +2262,7 @@ define i8 @src_shl_nsw_fail(i8 %x) { define i8 @src_shl_nuw(i8 %x) { ; CHECK-LABEL: @src_shl_nuw( -; CHECK-NEXT: [[R:%.*]] = shl i8 12, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw i8 12, [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sh = shl nuw i8 3, %x @@ -2282,7 +2282,7 @@ define i8 @src_shl_nuw_fail(i8 %x) { define i8 @src_lshr_exact(i8 %x) { ; CHECK-LABEL: @src_lshr_exact( -; CHECK-NEXT: [[R:%.*]] = lshr i8 48, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr exact i8 48, [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sh = lshr exact i8 96, %x @@ -2302,7 +2302,7 @@ define i8 @src_lshr_exact_fail(i8 %x) { define i8 @src_ashr_exact(i8 %x) { ; CHECK-LABEL: @src_ashr_exact( -; CHECK-NEXT: [[R:%.*]] = ashr i8 -8, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = ashr exact i8 -8, [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sh = ashr exact i8 -32, %x -- cgit v1.1 From 2e482b25329433a61fee2e22f4ea00775e7e7ec7 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Sun, 9 Jun 2024 01:00:17 +0300 Subject: [clang][NFC] Update CWG issues list --- clang/www/cxx_dr_status.html | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 4385744..5e2ab06 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -17186,6 +17186,18 @@ objects open Template argument deduction involving exception specifications Not resolved + + + 2897 + open + Copying potentially-overlapping union subobjects + Not resolved + + + 2898 + open + Clarify implicit conversion sequence from cv T to T + Not resolved -- cgit v1.1 From dcb71c06c7b059e313f22e46bc9c41343a03f1eb Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 8 Jun 2024 15:51:44 -0700 Subject: [MC] Simplify Sec.getFragmentList().insert(Sec.begin(), F). NFC Decrease the uses of getFragmentList() to make it easier to change the fragment list representation. --- llvm/include/llvm/MC/MCSection.h | 2 ++ llvm/lib/MC/MCContext.cpp | 10 +++++----- llvm/lib/MC/MCFragment.cpp | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 90bc48e..ad41d44 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -188,6 +188,8 @@ public: iterator end() { return Fragments.end(); } const_iterator end() const { return Fragments.end(); } + void addFragment(MCFragment &F) { Fragments.push_back(&F); } + MCSection::iterator getSubsectionInsertionPoint(unsigned Subsection); void dump() const; diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index f027eb6..771ca9c 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -498,7 +498,7 @@ MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type, R, LinkedToSym); auto *F = new MCDataFragment(); - Ret->getFragmentList().insert(Ret->begin(), F); + Ret->addFragment(*F); F->setParent(Ret); R->setFragment(F); @@ -772,7 +772,7 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, Entry.second = Result; auto *F = new MCDataFragment(); - Result->getFragmentList().insert(Result->begin(), F); + Result->addFragment(*F); F->setParent(Result); Begin->setFragment(F); @@ -838,7 +838,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection( Entry.second = Result; auto *F = new MCDataFragment(); - Result->getFragmentList().insert(Result->begin(), F); + Result->addFragment(*F); F->setParent(Result); if (Begin) @@ -861,7 +861,7 @@ MCSectionSPIRV *MCContext::getSPIRVSection() { MCSectionSPIRV(SectionKind::getText(), Begin); auto *F = new MCDataFragment(); - Result->getFragmentList().insert(Result->begin(), F); + Result->addFragment(*F); F->setParent(Result); return Result; @@ -884,7 +884,7 @@ MCSectionDXContainer *MCContext::getDXContainerSection(StringRef Section, // The first fragment will store the header auto *F = new MCDataFragment(); - MapIt->second->getFragmentList().insert(MapIt->second->begin(), F); + MapIt->second->addFragment(*F); F->setParent(MapIt->second); return MapIt->second; diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index a8da46d..7d08268 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -260,7 +260,7 @@ MCFragment::MCFragment(FragmentType Kind, bool HasInstructions, : Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)), LayoutOrder(0), Kind(Kind), IsBeingLaidOut(false), HasInstructions(HasInstructions) { if (Parent && !isa(*this)) - Parent->getFragmentList().push_back(this); + Parent->addFragment(*this); } void MCFragment::destroy() { -- cgit v1.1 From f20d8b9dcb07188813f52b9be308325f74154196 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 9 Jun 2024 09:58:46 +0700 Subject: [SPARC][IAS] Add GNU extension for `addc` Transform `addc imm, %rs, %rd` into `addc %rs, imm, %rd`. This is used in some GNU and Linux code. Reviewers: s-barannikov, rorth, jrtc27, brad0 Reviewed By: s-barannikov Pull Request: https://github.com/llvm/llvm-project/pull/94245 --- llvm/lib/Target/Sparc/SparcInstrAliases.td | 11 ++++++++--- llvm/test/MC/Sparc/sparcv9-instructions.s | 10 ++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/Sparc/SparcInstrAliases.td b/llvm/lib/Target/Sparc/SparcInstrAliases.td index db4c05c..2b92445 100644 --- a/llvm/lib/Target/Sparc/SparcInstrAliases.td +++ b/llvm/lib/Target/Sparc/SparcInstrAliases.td @@ -560,11 +560,16 @@ def : InstAlias<"mov $simm13, %tbr", (WRTBRri G0, simm13Op:$simm13), 0>; // End of Section A.3 -// or imm, reg, rd -> or reg, imm, rd -// Nonstandard GNU extension. -let EmitPriority = 0 in + +// Nonstandard GNU extensions. +let EmitPriority = 0 in { + // or imm, reg, rd -> or reg, imm, rd def : InstAlias<"or $simm13, $rs1, $rd", (ORri IntRegs:$rd, IntRegs:$rs1, simm13Op:$simm13)>; + // addc/addx imm, reg, rd -> or reg, imm, rd + def : InstAlias<"addx $simm13, $rs1, $rd", (ADDCri IntRegs:$rd, IntRegs:$rs1, simm13Op:$simm13)>; +} + // wr reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg // (aka: omit the first arg when it's g0. This is not in the manual, but is // supported by gnu and solaris as) diff --git a/llvm/test/MC/Sparc/sparcv9-instructions.s b/llvm/test/MC/Sparc/sparcv9-instructions.s index 0ca2e50..b947243 100644 --- a/llvm/test/MC/Sparc/sparcv9-instructions.s +++ b/llvm/test/MC/Sparc/sparcv9-instructions.s @@ -7,6 +7,16 @@ addc %g2, %g1, %g3 ! V8: error: invalid instruction mnemonic + ! V8-NEXT: addc %g2, 1, %g3 + ! V9: addx %g2, 1, %g3 ! encoding: [0x86,0x40,0xa0,0x01] + addc %g2, 1, %g3 + + ! V8: error: invalid instruction mnemonic + ! V8-NEXT: addc 1, %g2, %g3 + ! V9: addx %g2, 1, %g3 ! encoding: [0x86,0x40,0xa0,0x01] + addc 1, %g2, %g3 + + ! V8: error: invalid instruction mnemonic ! V8-NEXT: addccc %g1, %g2, %g3 ! V9: addxcc %g1, %g2, %g3 ! encoding: [0x86,0xc0,0x40,0x02] addccc %g1, %g2, %g3 -- cgit v1.1 From 44f93578d2508d7d67f7ca3bd1edf2393c19d11c Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 9 Jun 2024 10:09:51 +0700 Subject: [SPARC][IAS] Add support for %uhi and %ulo extensions This adds support for GNU %uhi and %ulo extensions. Those resolve to the same relocations as %hh and %hm. Reviewers: cyndyishida, dcci, brad0, jrtc27, aaupov, Endilll, rorth, maksfb, #reviewers-libcxxabi, s-barannikov, rafaelauler, ayermolo, #reviewers-libunwind, #reviewers-libcxx, daniel-grumberg, tbaederr Reviewed By: s-barannikov Pull Request: https://github.com/llvm/llvm-project/pull/94246 --- llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp | 78 +++++++++++----------- llvm/test/MC/Sparc/sparc-relocations.s | 10 +++ 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index 522a887..4688837 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -93,44 +93,46 @@ bool SparcMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name) { return StringSwitch(name) - .Case("lo", VK_Sparc_LO) - .Case("hi", VK_Sparc_HI) - .Case("h44", VK_Sparc_H44) - .Case("m44", VK_Sparc_M44) - .Case("l44", VK_Sparc_L44) - .Case("hh", VK_Sparc_HH) - .Case("hm", VK_Sparc_HM) - .Case("lm", VK_Sparc_LM) - .Case("pc22", VK_Sparc_PC22) - .Case("pc10", VK_Sparc_PC10) - .Case("got22", VK_Sparc_GOT22) - .Case("got10", VK_Sparc_GOT10) - .Case("got13", VK_Sparc_GOT13) - .Case("r_disp32", VK_Sparc_R_DISP32) - .Case("tgd_hi22", VK_Sparc_TLS_GD_HI22) - .Case("tgd_lo10", VK_Sparc_TLS_GD_LO10) - .Case("tgd_add", VK_Sparc_TLS_GD_ADD) - .Case("tgd_call", VK_Sparc_TLS_GD_CALL) - .Case("tldm_hi22", VK_Sparc_TLS_LDM_HI22) - .Case("tldm_lo10", VK_Sparc_TLS_LDM_LO10) - .Case("tldm_add", VK_Sparc_TLS_LDM_ADD) - .Case("tldm_call", VK_Sparc_TLS_LDM_CALL) - .Case("tldo_hix22", VK_Sparc_TLS_LDO_HIX22) - .Case("tldo_lox10", VK_Sparc_TLS_LDO_LOX10) - .Case("tldo_add", VK_Sparc_TLS_LDO_ADD) - .Case("tie_hi22", VK_Sparc_TLS_IE_HI22) - .Case("tie_lo10", VK_Sparc_TLS_IE_LO10) - .Case("tie_ld", VK_Sparc_TLS_IE_LD) - .Case("tie_ldx", VK_Sparc_TLS_IE_LDX) - .Case("tie_add", VK_Sparc_TLS_IE_ADD) - .Case("tle_hix22", VK_Sparc_TLS_LE_HIX22) - .Case("tle_lox10", VK_Sparc_TLS_LE_LOX10) - .Case("hix", VK_Sparc_HIX22) - .Case("lox", VK_Sparc_LOX10) - .Case("gdop_hix22", VK_Sparc_GOTDATA_HIX22) - .Case("gdop_lox10", VK_Sparc_GOTDATA_LOX10) - .Case("gdop", VK_Sparc_GOTDATA_OP) - .Default(VK_Sparc_None); + .Case("lo", VK_Sparc_LO) + .Case("hi", VK_Sparc_HI) + .Case("h44", VK_Sparc_H44) + .Case("m44", VK_Sparc_M44) + .Case("l44", VK_Sparc_L44) + .Case("hh", VK_Sparc_HH) + .Case("uhi", VK_Sparc_HH) // Nonstandard GNU extension + .Case("hm", VK_Sparc_HM) + .Case("ulo", VK_Sparc_HM) // Nonstandard GNU extension + .Case("lm", VK_Sparc_LM) + .Case("pc22", VK_Sparc_PC22) + .Case("pc10", VK_Sparc_PC10) + .Case("got22", VK_Sparc_GOT22) + .Case("got10", VK_Sparc_GOT10) + .Case("got13", VK_Sparc_GOT13) + .Case("r_disp32", VK_Sparc_R_DISP32) + .Case("tgd_hi22", VK_Sparc_TLS_GD_HI22) + .Case("tgd_lo10", VK_Sparc_TLS_GD_LO10) + .Case("tgd_add", VK_Sparc_TLS_GD_ADD) + .Case("tgd_call", VK_Sparc_TLS_GD_CALL) + .Case("tldm_hi22", VK_Sparc_TLS_LDM_HI22) + .Case("tldm_lo10", VK_Sparc_TLS_LDM_LO10) + .Case("tldm_add", VK_Sparc_TLS_LDM_ADD) + .Case("tldm_call", VK_Sparc_TLS_LDM_CALL) + .Case("tldo_hix22", VK_Sparc_TLS_LDO_HIX22) + .Case("tldo_lox10", VK_Sparc_TLS_LDO_LOX10) + .Case("tldo_add", VK_Sparc_TLS_LDO_ADD) + .Case("tie_hi22", VK_Sparc_TLS_IE_HI22) + .Case("tie_lo10", VK_Sparc_TLS_IE_LO10) + .Case("tie_ld", VK_Sparc_TLS_IE_LD) + .Case("tie_ldx", VK_Sparc_TLS_IE_LDX) + .Case("tie_add", VK_Sparc_TLS_IE_ADD) + .Case("tle_hix22", VK_Sparc_TLS_LE_HIX22) + .Case("tle_lox10", VK_Sparc_TLS_LE_LOX10) + .Case("hix", VK_Sparc_HIX22) + .Case("lox", VK_Sparc_LOX10) + .Case("gdop_hix22", VK_Sparc_GOTDATA_HIX22) + .Case("gdop_lox10", VK_Sparc_GOTDATA_LOX10) + .Case("gdop", VK_Sparc_GOTDATA_OP) + .Default(VK_Sparc_None); } Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) { diff --git a/llvm/test/MC/Sparc/sparc-relocations.s b/llvm/test/MC/Sparc/sparc-relocations.s index d99ddb7..82314e4 100644 --- a/llvm/test/MC/Sparc/sparc-relocations.s +++ b/llvm/test/MC/Sparc/sparc-relocations.s @@ -10,6 +10,8 @@ ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_M44 sym ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_L44 sym ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HH22 sym + ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HH22 sym + ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HM10 sym ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HM10 sym ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_LM22 sym ! CHECK-OBJ-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_13 sym @@ -49,10 +51,18 @@ ! CHECK-NEXT: ! fixup A - offset: 0, value: %hh(sym), kind: fixup_sparc_hh sethi %hh(sym), %l0 + ! CHECK: sethi %hh(sym), %l0 ! encoding: [0x21,0b00AAAAAA,A,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: %hh(sym), kind: fixup_sparc_hh + sethi %uhi(sym), %l0 + ! CHECK: or %g1, %hm(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A] ! CHECK-NEXT: ! fixup A - offset: 0, value: %hm(sym), kind: fixup_sparc_hm or %g1, %hm(sym), %g3 + ! CHECK: or %g1, %hm(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: %hm(sym), kind: fixup_sparc_hm + or %g1, %ulo(sym), %g3 + ! CHECK: sethi %lm(sym), %l0 ! encoding: [0x21,0b00AAAAAA,A,A] ! CHECK-NEXT: ! fixup A - offset: 0, value: %lm(sym), kind: fixup_sparc_lm sethi %lm(sym), %l0 -- cgit v1.1 From 715a5d8d93a8383e50e9400303313288c0e5b0de Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 9 Jun 2024 10:14:50 +0700 Subject: [SPARC][IAS] Add aliases for %asr20-21 as defined in JPS1 This adds %set_softint and %clear_softint alias for %asr20 and %asr21 as defined in JPS1. Reviewers: jrtc27, brad0, s-barannikov, rorth Reviewed By: s-barannikov Pull Request: https://github.com/llvm/llvm-project/pull/94247 --- llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 11 +++++++++-- llvm/test/MC/Sparc/sparcv9-instructions.s | 4 ++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 185b2fe..e4f5c64 100644 --- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -1384,12 +1384,11 @@ MCRegister SparcAsmParser::matchRegisterName(const AsmToken &Tok, } // JPS1 extension - aliases for ASRs - // Section A.51 - Read State Register + // Section 5.2.11 - Ancillary State Registers (ASRs) if (Name == "pcr") { RegKind = SparcOperand::rk_Special; return SP::ASR16; } - if (Name == "pic") { RegKind = SparcOperand::rk_Special; return SP::ASR17; @@ -1402,6 +1401,14 @@ MCRegister SparcAsmParser::matchRegisterName(const AsmToken &Tok, RegKind = SparcOperand::rk_Special; return SP::ASR19; } + if (Name == "set_softint") { + RegKind = SparcOperand::rk_Special; + return SP::ASR20; + } + if (Name == "clear_softint") { + RegKind = SparcOperand::rk_Special; + return SP::ASR21; + } if (Name == "softint") { RegKind = SparcOperand::rk_Special; return SP::ASR22; diff --git a/llvm/test/MC/Sparc/sparcv9-instructions.s b/llvm/test/MC/Sparc/sparcv9-instructions.s index b947243..d461c82 100644 --- a/llvm/test/MC/Sparc/sparcv9-instructions.s +++ b/llvm/test/MC/Sparc/sparcv9-instructions.s @@ -502,6 +502,10 @@ wr %i0, %i1, %ccr ! V9: wr %i0, 1, %ccr ! encoding: [0x85,0x86,0x20,0x01] wr %i0, 1, %ccr + ! V9: wr %i0, 1, %asr20 ! encoding: [0xa9,0x86,0x20,0x01] + wr %i0, 1, %set_softint + ! V9: wr %i0, 1, %asr21 ! encoding: [0xab,0x86,0x20,0x01] + wr %i0, 1, %clear_softint ! V9: st %o1, [%o0] ! encoding: [0xd2,0x22,0x00,0x00] stw %o1, [%o0] -- cgit v1.1 From cc8fa1e9206aa69197c891ca2f17b64340c5a6aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sat, 8 Jun 2024 11:18:33 +0200 Subject: [clang][Interp][NFC] Refactor lvalue-to-rvalue conversion code Really perform the conversion always if the flag is set and don't make it dependent on whether we're checking the result for initialization. --- clang/lib/AST/Interp/EvalEmitter.cpp | 30 ++++++++++++++---------------- clang/lib/AST/Interp/EvaluationResult.cpp | 9 ++++++--- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/clang/lib/AST/Interp/EvalEmitter.cpp b/clang/lib/AST/Interp/EvalEmitter.cpp index f6191d8..025b46b 100644 --- a/clang/lib/AST/Interp/EvalEmitter.cpp +++ b/clang/lib/AST/Interp/EvalEmitter.cpp @@ -40,7 +40,7 @@ void EvalEmitter::cleanup() { S.cleanup(); } EvaluationResult EvalEmitter::interpretExpr(const Expr *E, bool ConvertResultToRValue) { S.setEvalLocation(E->getExprLoc()); - this->ConvertResultToRValue = ConvertResultToRValue; + this->ConvertResultToRValue = ConvertResultToRValue && !isa(E); this->CheckFullyInitialized = isa(E); EvalResult.setSource(E); @@ -56,10 +56,14 @@ EvaluationResult EvalEmitter::interpretExpr(const Expr *E, EvaluationResult EvalEmitter::interpretDecl(const VarDecl *VD, bool CheckFullyInitialized) { this->CheckFullyInitialized = CheckFullyInitialized; - this->ConvertResultToRValue = - VD->getAnyInitializer() && - (VD->getAnyInitializer()->getType()->isAnyComplexType() || - VD->getAnyInitializer()->getType()->isVectorType()); + + if (const Expr *Init = VD->getAnyInitializer()) { + QualType T = VD->getType(); + this->ConvertResultToRValue = !Init->isGLValue() && !T->isPointerType() && + !T->isObjCObjectPointerType(); + } else + this->ConvertResultToRValue = false; + EvalResult.setSource(VD); if (!this->visitDecl(VD) && EvalResult.empty()) @@ -138,6 +142,10 @@ template <> bool EvalEmitter::emitRet(const SourceInfo &Info) { return true; const Pointer &Ptr = S.Stk.pop(); + + if (CheckFullyInitialized && !EvalResult.checkFullyInitialized(S, Ptr)) + return false; + // Implicitly convert lvalue to rvalue, if requested. if (ConvertResultToRValue) { if (std::optional V = Ptr.toRValue(Ctx)) { @@ -146,17 +154,7 @@ template <> bool EvalEmitter::emitRet(const SourceInfo &Info) { return false; } } else { - if (CheckFullyInitialized) { - if (!EvalResult.checkFullyInitialized(S, Ptr)) - return false; - - std::optional RValueResult = Ptr.toRValue(Ctx); - if (!RValueResult) - return false; - EvalResult.setValue(*RValueResult); - } else { - EvalResult.setValue(Ptr.toAPValue()); - } + EvalResult.setValue(Ptr.toAPValue()); } return true; diff --git a/clang/lib/AST/Interp/EvaluationResult.cpp b/clang/lib/AST/Interp/EvaluationResult.cpp index 2997723..387e3dc 100644 --- a/clang/lib/AST/Interp/EvaluationResult.cpp +++ b/clang/lib/AST/Interp/EvaluationResult.cpp @@ -151,9 +151,12 @@ bool EvaluationResult::checkFullyInitialized(InterpState &S, if (const Record *R = Ptr.getRecord()) return CheckFieldsInitialized(S, InitLoc, Ptr, R); - const auto *CAT = - cast(Ptr.getType()->getAsArrayTypeUnsafe()); - return CheckArrayInitialized(S, InitLoc, Ptr, CAT); + + if (const auto *CAT = dyn_cast_if_present( + Ptr.getType()->getAsArrayTypeUnsafe())) + return CheckArrayInitialized(S, InitLoc, Ptr, CAT); + + return true; } } // namespace interp -- cgit v1.1 From d211abc625cc7bbc8616885bb8eaf4a69a9a3853 Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Sun, 9 Jun 2024 09:01:41 +0200 Subject: [clang-tidy] Ignore non-math operators in readability-math-missing-parentheses (#94654) Do not emit warnings for non-math operators. Closes #92516 --- .../readability/MathMissingParenthesesCheck.cpp | 3 ++- .../checkers/readability/math-missing-parentheses.cpp | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp index 65fd296..64ce94e 100644 --- a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp @@ -57,7 +57,8 @@ static void addParantheses(const BinaryOperator *BinOp, int Precedence1 = getPrecedence(BinOp); int Precedence2 = getPrecedence(ParentBinOp); - if (ParentBinOp != nullptr && Precedence1 != Precedence2) { + if (ParentBinOp != nullptr && Precedence1 != Precedence2 && Precedence1 > 0 && + Precedence2 > 0) { const clang::SourceLocation StartLoc = BinOp->getBeginLoc(); const clang::SourceLocation EndLoc = clang::Lexer::getLocForEndOfToken(BinOp->getEndLoc(), 0, SM, LangOpts); diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp index a6045c0..4face0b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp @@ -140,3 +140,20 @@ void f(){ //CHECK-MESSAGES: :[[@LINE+1]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] int v = FUN5(0 + 1); } + +namespace PR92516 { + void f(int i) { + int j, k; + for (j = i + 1, k = 0; j < 1; ++j) {} + } + + void f2(int i) { + int j; + for (j = i + 1; j < 1; ++j) {} + } + + void f3(int i) { + int j; + for (j = i + 1, 2; j < 1; ++j) {} + } +} -- cgit v1.1 From 338cbfef03e0ab58d7b52f3301928c58b194a1b4 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sun, 9 Jun 2024 09:42:24 +0200 Subject: [mlir][Transforms][NFC] Improve dialect conversion documentation (#94736) --- mlir/include/mlir/Transforms/DialectConversion.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 83198c9..f6c5149 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -1088,8 +1088,9 @@ struct ConversionConfig { /// An optional listener that is notified about all IR modifications in case /// dialect conversion succeeds. If the dialect conversion fails and no IR - /// modifications are visible (i.e., they were all rolled back), no - /// notifications are sent. + /// modifications are visible (i.e., they were all rolled back), or if the + /// dialect conversion is an "analysis conversion", no notifications are + /// sent (apart from `notifyPatternBegin`/notifyPatternEnd`). /// /// Note: Notifications are sent in a delayed fashion, when the dialect /// conversion is guaranteed to succeed. At that point, some IR modifications -- cgit v1.1 From 32b70430c3a62c101e41243f8b1e116820ab6326 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 9 Jun 2024 10:39:43 +0100 Subject: [ARM] vector-store.ll - add big-endian test coverage Based on feedback on #94863 --- llvm/test/CodeGen/ARM/vector-store.ll | 862 ++++++++++++++++++++++------------ 1 file changed, 553 insertions(+), 309 deletions(-) diff --git a/llvm/test/CodeGen/ARM/vector-store.ll b/llvm/test/CodeGen/ARM/vector-store.ll index a8a1031..2f27786 100644 --- a/llvm/test/CodeGen/ARM/vector-store.ll +++ b/llvm/test/CodeGen/ARM/vector-store.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s | FileCheck %s - -target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -target triple = "thumbv7-none-eabi" +; RUN: llc < %s -mtriple=thumbv7-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc < %s -mtriple=thumbebv7-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-BE define void @store_v8i8(ptr %ptr, <8 x i8> %val) { ; CHECK-LABEL: store_v8i8: @@ -11,24 +9,33 @@ define void @store_v8i8(ptr %ptr, <8 x i8> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i8> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <8 x i8> %val, ptr %A, align 1 + ret void } define void @store_v8i8_update(ptr %ptr, <8 x i8> %val) { -; CHECK-LABEL: store_v8i8_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i8> %val, ptr %A, align 1 - %inc = getelementptr <8 x i8>, ptr %A, i38 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v8i8_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v8i8_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <8 x i8> %val, ptr %A, align 1 + %inc = getelementptr <8 x i8>, ptr %A, i38 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v4i16(ptr %ptr, <4 x i16> %val) { @@ -38,24 +45,33 @@ define void @store_v4i16(ptr %ptr, <4 x i16> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i16> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <4 x i16> %val, ptr %A, align 1 + ret void } define void @store_v4i16_update(ptr %ptr, <4 x i16> %val) { -; CHECK-LABEL: store_v4i16_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i16> %val, ptr %A, align 1 - %inc = getelementptr <4 x i16>, ptr %A, i34 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v4i16_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4i16_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x i16> %val, ptr %A, align 1 + %inc = getelementptr <4 x i16>, ptr %A, i34 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i32(ptr %ptr, <2 x i32> %val) { @@ -65,24 +81,33 @@ define void @store_v2i32(ptr %ptr, <2 x i32> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i32> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <2 x i32> %val, ptr %A, align 1 + ret void } define void @store_v2i32_update(ptr %ptr, <2 x i32> %val) { -; CHECK-LABEL: store_v2i32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i32> %val, ptr %A, align 1 - %inc = getelementptr <2 x i32>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i32> %val, ptr %A, align 1 + %inc = getelementptr <2 x i32>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2f32(ptr %ptr, <2 x float> %val) { @@ -92,24 +117,33 @@ define void @store_v2f32(ptr %ptr, <2 x float> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x float> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <2 x float> %val, ptr %A, align 1 + ret void } define void @store_v2f32_update(ptr %ptr, <2 x float> %val) { -; CHECK-LABEL: store_v2f32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x float> %val, ptr %A, align 1 - %inc = getelementptr <2 x float>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2f32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2f32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x float> %val, ptr %A, align 1 + %inc = getelementptr <2 x float>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v1i64(ptr %ptr, <1 x i64> %val) { @@ -119,279 +153,458 @@ define void @store_v1i64(ptr %ptr, <1 x i64> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <1 x i64> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <1 x i64> %val, ptr %A, align 1 + ret void } define void @store_v1i64_update(ptr %ptr, <1 x i64> %val) { -; CHECK-LABEL: store_v1i64_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <1 x i64> %val, ptr %A, align 1 - %inc = getelementptr <1 x i64>, ptr %A, i31 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v1i64_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v1i64_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <1 x i64> %val, ptr %A, align 1 + %inc = getelementptr <1 x i64>, ptr %A, i31 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v16i8(ptr %ptr, <16 x i8> %val) { -; CHECK-LABEL: store_v16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <16 x i8> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v16i8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v16i8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <16 x i8> %val, ptr %A, align 1 + ret void } define void @store_v16i8_update(ptr %ptr, <16 x i8> %val) { -; CHECK-LABEL: store_v16i8_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <16 x i8> %val, ptr %A, align 1 - %inc = getelementptr <16 x i8>, ptr %A, i316 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v16i8_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v16i8_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <16 x i8> %val, ptr %A, align 1 + %inc = getelementptr <16 x i8>, ptr %A, i316 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v8i16(ptr %ptr, <8 x i16> %val) { -; CHECK-LABEL: store_v8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i16> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v8i16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v8i16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <8 x i16> %val, ptr %A, align 1 + ret void } define void @store_v8i16_update(ptr %ptr, <8 x i16> %val) { -; CHECK-LABEL: store_v8i16_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i16> %val, ptr %A, align 1 - %inc = getelementptr <8 x i16>, ptr %A, i38 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v8i16_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v8i16_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <8 x i16> %val, ptr %A, align 1 + %inc = getelementptr <8 x i16>, ptr %A, i38 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v4i32(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: store_v4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i32> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v4i32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x i32> %val, ptr %A, align 1 + ret void } define void @store_v4i32_update(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: store_v4i32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i32> %val, ptr %A, align 1 - %inc = getelementptr <4 x i32>, ptr %A, i34 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v4i32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4i32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x i32> %val, ptr %A, align 1 + %inc = getelementptr <4 x i32>, ptr %A, i34 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v4f32(ptr %ptr, <4 x float> %val) { -; CHECK-LABEL: store_v4f32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x float> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v4f32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4f32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x float> %val, ptr %A, align 1 + ret void } define void @store_v4f32_update(ptr %ptr, <4 x float> %val) { -; CHECK-LABEL: store_v4f32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x float> %val, ptr %A, align 1 - %inc = getelementptr <4 x float>, ptr %A, i34 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v4f32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4f32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x float> %val, ptr %A, align 1 + %inc = getelementptr <4 x float>, ptr %A, i34 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v2i64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 1 + ret void } define void @store_v2i64_update(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 1 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 1 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned2(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned2: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.16 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 2 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned2: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.16 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned2: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.16 q8, q8 +; CHECK-BE-NEXT: vst1.16 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 2 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned4(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned4: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.32 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 4 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned4: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.32 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned4: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vst1.32 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 4 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned8(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.64 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 8 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 8 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned16(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 16 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r1:128]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r1:128]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 16 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @truncstore_v4i32tov4i8(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: truncstore_v4i32tov4i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmovn.i32 d16, q8 -; CHECK-NEXT: vuzp.8 d16, d17 -; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - %trunc = trunc <4 x i32> %val to <4 x i8> - store <4 x i8> %trunc, ptr %A, align 4 - ret void +; CHECK-LE-LABEL: truncstore_v4i32tov4i8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmovn.i32 d16, q8 +; CHECK-LE-NEXT: vuzp.8 d16, d17 +; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r0:32] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: truncstore_v4i32tov4i8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vmovn.i32 d16, q8 +; CHECK-BE-NEXT: vrev16.8 d16, d16 +; CHECK-BE-NEXT: vuzp.8 d16, d17 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev32.8 d16, d17 +; CHECK-BE-NEXT: vst1.32 {d16[0]}, [r0:32] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + %trunc = trunc <4 x i32> %val to <4 x i8> + store <4 x i8> %trunc, ptr %A, align 4 + ret void } define void @truncstore_v4i32tov4i8_fake_update(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: truncstore_v4i32tov4i8_fake_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: movs r2, #16 -; CHECK-NEXT: vmovn.i32 d16, q8 -; CHECK-NEXT: vuzp.8 d16, d17 -; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32], r2 -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - %trunc = trunc <4 x i32> %val to <4 x i8> - store <4 x i8> %trunc, ptr %A, align 4 - %inc = getelementptr <4 x i8>, ptr %A, i38 4 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: truncstore_v4i32tov4i8_fake_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: movs r2, #16 +; CHECK-LE-NEXT: vmovn.i32 d16, q8 +; CHECK-LE-NEXT: vuzp.8 d16, d17 +; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r1:32], r2 +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: truncstore_v4i32tov4i8_fake_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: movs r2, #16 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vmovn.i32 d16, q8 +; CHECK-BE-NEXT: vrev16.8 d16, d16 +; CHECK-BE-NEXT: vuzp.8 d16, d17 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev32.8 d16, d17 +; CHECK-BE-NEXT: vst1.32 {d16[0]}, [r1:32], r2 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + %trunc = trunc <4 x i32> %val to <4 x i8> + store <4 x i8> %trunc, ptr %A, align 4 + %inc = getelementptr <4 x i8>, ptr %A, i38 4 + store ptr %inc, ptr %ptr + ret void } define ptr @test_vst1_1reg(ptr %ptr.in, ptr %ptr.out) { -; CHECK-LABEL: test_vst1_1reg: -; CHECK: @ %bb.0: -; CHECK-NEXT: vld1.64 {d16, d17}, [r0] -; CHECK-NEXT: movs r0, #32 -; CHECK-NEXT: vst1.32 {d16, d17}, [r1], r0 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: test_vst1_1reg: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-LE-NEXT: movs r0, #32 +; CHECK-LE-NEXT: vst1.32 {d16, d17}, [r1], r0 +; CHECK-LE-NEXT: mov r0, r1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: test_vst1_1reg: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-BE-NEXT: movs r0, #32 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vst1.32 {d16, d17}, [r1], r0 +; CHECK-BE-NEXT: mov r0, r1 +; CHECK-BE-NEXT: bx lr %val = load <4 x i32>, ptr %ptr.in store <4 x i32> %val, ptr %ptr.out %next = getelementptr <4 x i32>, ptr %ptr.out, i32 2 @@ -400,37 +613,68 @@ define ptr @test_vst1_1reg(ptr %ptr.in, ptr %ptr.out) { ; PR56970 define void @v3i8store(ptr %p) { -; CHECK-LABEL: v3i8store: -; CHECK: @ %bb.0: -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vmov.i32 d16, #0xff -; CHECK-NEXT: mov r1, sp -; CHECK-NEXT: vmov.i32 d17, #0x0 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vand d16, d17, d16 -; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32] -; CHECK-NEXT: vld1.32 {d16[0]}, [r1:32] -; CHECK-NEXT: vmovl.u16 q8, d16 -; CHECK-NEXT: strb r2, [r0, #2] -; CHECK-NEXT: vmov.32 r1, d16[0] -; CHECK-NEXT: strh r1, [r0] -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: v3i8store: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: .pad #4 +; CHECK-LE-NEXT: sub sp, #4 +; CHECK-LE-NEXT: vmov.i32 d16, #0xff +; CHECK-LE-NEXT: mov r1, sp +; CHECK-LE-NEXT: vmov.i32 d17, #0x0 +; CHECK-LE-NEXT: movs r2, #0 +; CHECK-LE-NEXT: vand d16, d17, d16 +; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r1:32] +; CHECK-LE-NEXT: vld1.32 {d16[0]}, [r1:32] +; CHECK-LE-NEXT: vmovl.u16 q8, d16 +; CHECK-LE-NEXT: strb r2, [r0, #2] +; CHECK-LE-NEXT: vmov.32 r1, d16[0] +; CHECK-LE-NEXT: strh r1, [r0] +; CHECK-LE-NEXT: add sp, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: v3i8store: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .pad #4 +; CHECK-BE-NEXT: sub sp, #4 +; CHECK-BE-NEXT: movs r1, #0 +; CHECK-BE-NEXT: mov r2, sp +; CHECK-BE-NEXT: str r1, [sp] +; CHECK-BE-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-BE-NEXT: strb r1, [r0, #2] +; CHECK-BE-NEXT: vrev32.16 d16, d16 +; CHECK-BE-NEXT: vmovl.u16 q8, d16 +; CHECK-BE-NEXT: vmov.32 r2, d16[0] +; CHECK-BE-NEXT: strh r2, [r0] +; CHECK-BE-NEXT: add sp, #4 +; CHECK-BE-NEXT: bx lr store <3 x i8> zeroinitializer, ptr %p, align 4 ret void } define void @v3i64shuffle(ptr %p, <3 x i64> %a) { -; CHECK-LABEL: v3i64shuffle: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i32 q8, #0x0 -; CHECK-NEXT: ldrd r12, r1, [sp, #8] -; CHECK-NEXT: vmov d18, r2, r3 -; CHECK-NEXT: vorr d19, d16, d16 -; CHECK-NEXT: str r1, [r0, #20] -; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! -; CHECK-NEXT: str.w r12, [r0] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: v3i64shuffle: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i32 q8, #0x0 +; CHECK-LE-NEXT: ldrd r12, r1, [sp, #8] +; CHECK-LE-NEXT: vmov d18, r2, r3 +; CHECK-LE-NEXT: vorr d19, d16, d16 +; CHECK-LE-NEXT: str r1, [r0, #20] +; CHECK-LE-NEXT: vst1.32 {d18, d19}, [r0]! +; CHECK-LE-NEXT: str.w r12, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: v3i64shuffle: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [sp, #8] +; CHECK-BE-NEXT: vmov.i32 q9, #0x0 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vmov r12, r1, d16 +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: vorr d17, d18, d18 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: str r1, [r0, #20] +; CHECK-BE-NEXT: vst1.32 {d16, d17}, [r0]! +; CHECK-BE-NEXT: str.w r12, [r0] +; CHECK-BE-NEXT: bx lr %b = shufflevector <3 x i64> %a, <3 x i64> zeroinitializer, <3 x i32> store <3 x i64> %b, ptr %p, align 4 ret void -- cgit v1.1 From e329bfcb033910fc340b6da5a6307003ac6b2b01 Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Sun, 9 Jun 2024 11:52:46 +0200 Subject: [clang-tidy] Ignore implicit functions in readability-implicit-bool-conversion (#94512) Ignore implicit declarations and defaulted functions. Helps with issues in generated code like, C++ spaceship operator. Closes #93409 --- .../readability/ImplicitBoolConversionCheck.cpp | 7 +++-- clang-tools-extra/docs/ReleaseNotes.rst | 3 ++- .../readability/implicit-bool-conversion-cxx20.cpp | 31 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion-cxx20.cpp diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp index 28f5ead..aa115cd 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp @@ -279,6 +279,9 @@ void ImplicitBoolConversionCheck::registerMatchers(MatchFinder *Finder) { hasParent(callExpr()), hasSourceExpression(binaryOperator(hasAnyOperatorName("==", "!=")))); + auto IsInCompilerGeneratedFunction = hasAncestor(namedDecl(anyOf( + isImplicit(), functionDecl(isDefaulted()), functionTemplateDecl()))); + Finder->addMatcher( traverse(TK_AsIs, implicitCastExpr( @@ -299,7 +302,7 @@ void ImplicitBoolConversionCheck::registerMatchers(MatchFinder *Finder) { // additional parens in replacement. optionally(hasParent(stmt().bind("parentStmt"))), unless(isInTemplateInstantiation()), - unless(hasAncestor(functionTemplateDecl()))) + unless(IsInCompilerGeneratedFunction)) .bind("implicitCastToBool")), this); @@ -331,7 +334,7 @@ void ImplicitBoolConversionCheck::registerMatchers(MatchFinder *Finder) { anyOf(hasParent(implicitCastExpr().bind("furtherImplicitCast")), anything()), unless(isInTemplateInstantiation()), - unless(hasAncestor(functionTemplateDecl())))), + unless(IsInCompilerGeneratedFunction))), this); } diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 277a6e7..0b03e37 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -408,7 +408,8 @@ Changes in existing checks valid fix suggestions for ``static_cast`` without a preceding space and fixed problem with duplicate parentheses in double implicit casts. Corrected the fix suggestions for C23 and later by using C-style casts instead of - ``static_cast``. + ``static_cast``. Fixed false positives in C++20 spaceship operator by ignoring + casts in implicit and defaulted functions. - Improved :doc:`readability-redundant-inline-specifier ` check to properly diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion-cxx20.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion-cxx20.cpp new file mode 100644 index 0000000..13aa5c5 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion-cxx20.cpp @@ -0,0 +1,31 @@ +// RUN: %check_clang_tidy -std=c++20 %s readability-implicit-bool-conversion %t + +namespace std { +struct strong_ordering { + int n; + constexpr operator int() const { return n; } + static const strong_ordering equal, greater, less; +}; +constexpr strong_ordering strong_ordering::equal = {0}; +constexpr strong_ordering strong_ordering::greater = {1}; +constexpr strong_ordering strong_ordering::less = {-1}; +} // namespace std + +namespace PR93409 { + struct X + { + auto operator<=>(const X&) const = default; + bool m_b; + }; + + struct Y + { + auto operator<=>(const Y&) const = default; + X m_x; + }; + + bool compare(const Y& y1, const Y& y2) + { + return y1 == y2 || y1 < y2 || y1 > y2; + } +} -- cgit v1.1 From 31b84d459cea6bde7f8cb232e70ffb0cf8e5d1ed Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Sun, 9 Jun 2024 11:53:59 +0200 Subject: [clang-tidy] Extend modernize-use-designated-initializers with new options (#94651) Add StrictCStandardCompliance and StrictCppStandardCompliance options that default to true. Closes #83732 --- .../modernize/UseDesignatedInitializersCheck.cpp | 18 +++++++++++++++++- .../modernize/UseDesignatedInitializersCheck.h | 9 +++++++++ .../checks/modernize/use-designated-initializers.rst | 12 +++++++++++- .../checkers/modernize/use-designated-initializers.cpp | 8 ++++---- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp index ebc5338..2a0cc40 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp @@ -32,6 +32,14 @@ static constexpr bool RestrictToPODTypesDefault = false; static constexpr char IgnoreMacrosName[] = "IgnoreMacros"; static constexpr bool IgnoreMacrosDefault = true; +static constexpr char StrictCStandardComplianceName[] = + "StrictCStandardCompliance"; +static constexpr bool StrictCStandardComplianceDefault = true; + +static constexpr char StrictCppStandardComplianceName[] = + "StrictCppStandardCompliance"; +static constexpr bool StrictCppStandardComplianceDefault = true; + namespace { struct Designators { @@ -97,7 +105,12 @@ UseDesignatedInitializersCheck::UseDesignatedInitializersCheck( RestrictToPODTypes( Options.get(RestrictToPODTypesName, RestrictToPODTypesDefault)), IgnoreMacros( - Options.getLocalOrGlobal(IgnoreMacrosName, IgnoreMacrosDefault)) {} + Options.getLocalOrGlobal(IgnoreMacrosName, IgnoreMacrosDefault)), + StrictCStandardCompliance(Options.get(StrictCStandardComplianceName, + StrictCStandardComplianceDefault)), + StrictCppStandardCompliance( + Options.get(StrictCppStandardComplianceName, + StrictCppStandardComplianceDefault)) {} void UseDesignatedInitializersCheck::registerMatchers(MatchFinder *Finder) { const auto HasBaseWithFields = @@ -179,6 +192,9 @@ void UseDesignatedInitializersCheck::storeOptions( IgnoreSingleElementAggregates); Options.store(Opts, RestrictToPODTypesName, RestrictToPODTypes); Options.store(Opts, IgnoreMacrosName, IgnoreMacros); + Options.store(Opts, StrictCStandardComplianceName, StrictCStandardCompliance); + Options.store(Opts, StrictCppStandardComplianceName, + StrictCppStandardCompliance); } } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h index 0a496f5..79095ad 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h @@ -29,10 +29,19 @@ public: return TK_IgnoreUnlessSpelledInSource; } + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus20 || LangOpts.C99 || + (LangOpts.CPlusPlus && !StrictCppStandardCompliance) || + (!LangOpts.CPlusPlus && !LangOpts.ObjC && + !StrictCStandardCompliance); + } + private: bool IgnoreSingleElementAggregates; bool RestrictToPODTypes; bool IgnoreMacros; + bool StrictCStandardCompliance; + bool StrictCppStandardCompliance; }; } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-designated-initializers.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-designated-initializers.rst index 22f5098..f101cfc 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-designated-initializers.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-designated-initializers.rst @@ -37,7 +37,7 @@ declaration of ``S``. Even when compiling in a language version older than C++20, depending on your compiler, designated initializers are potentially supported. Therefore, the -check is not restricted to C++20 and newer versions. Check out the options +check is by default restricted to C99/C++20 and above. Check out the options ``-Wc99-designator`` to get support for mixed designators in initializer list in C and ``-Wc++20-designator`` for support of designated initializers in older C++ language modes. @@ -60,3 +60,13 @@ Options The value `true` specifies that only Plain Old Data (POD) types shall be checked. This makes the check applicable to even older C++ standards. The default value is `false`. + +.. option:: StrictCStandardCompliance + + When set to `false`, the check will not restrict itself to C99 and above. + The default value is `true`. + +.. option:: StrictCppStandardCompliance + + When set to `false`, the check will not restrict itself to C++20 and above. + The default value is `true`. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp index 7e5c26e3..9b769ad 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp @@ -1,13 +1,13 @@ -// RUN: %check_clang_tidy -std=c++17 %s modernize-use-designated-initializers %t \ +// RUN: %check_clang_tidy -std=c++20 %s modernize-use-designated-initializers %t \ // RUN: -- \ // RUN: -- -fno-delayed-template-parsing -// RUN: %check_clang_tidy -check-suffixes=,SINGLE-ELEMENT -std=c++17 %s modernize-use-designated-initializers %t \ +// RUN: %check_clang_tidy -check-suffixes=,SINGLE-ELEMENT -std=c++20 %s modernize-use-designated-initializers %t \ // RUN: -- -config="{CheckOptions: {modernize-use-designated-initializers.IgnoreSingleElementAggregates: false}}" \ // RUN: -- -fno-delayed-template-parsing -// RUN: %check_clang_tidy -check-suffixes=POD -std=c++17 %s modernize-use-designated-initializers %t \ +// RUN: %check_clang_tidy -check-suffixes=POD -std=c++20 %s modernize-use-designated-initializers %t \ // RUN: -- -config="{CheckOptions: {modernize-use-designated-initializers.RestrictToPODTypes: true}}" \ // RUN: -- -fno-delayed-template-parsing -// RUN: %check_clang_tidy -check-suffixes=,MACROS -std=c++17 %s modernize-use-designated-initializers %t \ +// RUN: %check_clang_tidy -check-suffixes=,MACROS -std=c++20 %s modernize-use-designated-initializers %t \ // RUN: -- -config="{CheckOptions: {modernize-use-designated-initializers.IgnoreMacros: false}}" \ // RUN: -- -fno-delayed-template-parsing -- cgit v1.1 From b55fb567e49214b47b3f2e1c9b8f39a64d620ded Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Sun, 9 Jun 2024 11:55:08 +0200 Subject: [clang-tidy] Improve bugprone-multi-level-implicit-pointer-conversion (#94524) Ignore implicit pointer conversions that are part of a cast expression Closes #93959 --- .../bugprone/MultiLevelImplicitPointerConversionCheck.cpp | 11 ++++++++++- clang-tools-extra/docs/ReleaseNotes.rst | 4 ++++ .../bugprone/multi-level-implicit-pointer-conversion.cpp | 12 ++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp index 4dd3cb5..7a989b0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp @@ -48,12 +48,21 @@ AST_MATCHER(ImplicitCastExpr, isMultiLevelPointerConversion) { return SourcePtrLevel != TargetPtrLevel; } +AST_MATCHER(QualType, isPointerType) { + const QualType Type = + Node.getCanonicalType().getNonReferenceType().getUnqualifiedType(); + + return !Type.isNull() && Type->isPointerType(); +} + } // namespace void MultiLevelImplicitPointerConversionCheck::registerMatchers( MatchFinder *Finder) { Finder->addMatcher( - implicitCastExpr(hasCastKind(CK_BitCast), isMultiLevelPointerConversion()) + implicitCastExpr(hasCastKind(CK_BitCast), isMultiLevelPointerConversion(), + unless(hasParent(explicitCastExpr( + hasDestinationType(isPointerType()))))) .bind("expr"), this); } diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 0b03e37..50edc10 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -224,6 +224,10 @@ Changes in existing checks check by ignoring ``__func__`` macro in lambda captures, initializers of default parameters and nested function declarations. +- Improved :doc:`bugprone-multi-level-implicit-pointer-conversion + ` check + by ignoring implicit pointer conversions that are part of a cast expression. + - Improved :doc:`bugprone-non-zero-enum-to-bool-conversion ` check by eliminating false positives resulting from direct usage of bitwise operators diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/multi-level-implicit-pointer-conversion.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/multi-level-implicit-pointer-conversion.cpp index 7a56242..6868f9e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/multi-level-implicit-pointer-conversion.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/multi-level-implicit-pointer-conversion.cpp @@ -63,3 +63,15 @@ void test() takeSecondLevelVoidPtr(getSecondLevelVoidPtr()); } + +namespace PR93959 { + void free(void*); + + void test() { + char **p = nullptr; + free(p); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: multilevel pointer conversion from 'char **' to 'void *', please use explicit cast [bugprone-multi-level-implicit-pointer-conversion] + free((void *)p); + free(static_cast(p)); + } +} -- cgit v1.1 From 46d94bd0ad0c22a686ea71f6e7d0494f74c22f1a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 9 Jun 2024 11:06:26 +0100 Subject: [X86] Trim trailing whitespace to reduce diff in #94845 --- llvm/lib/Target/X86/X86InstrCompiler.td | 14 +++++++------- llvm/lib/Target/X86/X86InstrMisc.td | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index cf5a32f..6fb6e16 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1572,21 +1572,21 @@ let Predicates = [HasNDD] in { } // Depositing value to 8/16 bit subreg: -def : Pat<(or (and GR64:$dst, -256), +def : Pat<(or (and GR64:$dst, -256), (i64 (zextloadi8 addr:$src))), - (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; + (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; -def : Pat<(or (and GR32:$dst, -256), +def : Pat<(or (and GR32:$dst, -256), (i32 (zextloadi8 addr:$src))), - (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; + (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; -def : Pat<(or (and GR64:$dst, -65536), +def : Pat<(or (and GR64:$dst, -65536), (i64 (zextloadi16 addr:$src))), (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; -def : Pat<(or (and GR32:$dst, -65536), +def : Pat<(or (and GR32:$dst, -65536), (i32 (zextloadi16 addr:$src))), - (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; + (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; // To avoid needing to materialize an immediate in a register, use a 32-bit and // with implicit zero-extension instead of a 64-bit and if the immediate has at diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 496a7e6..c4da0e5 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file defining the misc X86 instructions. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -- cgit v1.1 From 53fecef1ec5cb502900f82b07036b28cb0fb9f0f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 9 Jun 2024 11:30:05 +0100 Subject: [DAG] FoldConstantArithmetic - allow binop folding to work with differing bitcasted constants (#94863) We currently only constant fold binop(bitcast(c1),bitcast(c2)) if c1 and c2 are both bitcasted and from the same type. This patch relaxes this assumption to allow the constant build vector to originate from different types (and allow cases where only one operand was bitcasted). We still ensure we bitcast back to one of the original types if both operand were bitcasted (we assume that if we have a non-bitcasted constant then its legal to keep using that type). --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 25 ++++++++++++++++--------- llvm/test/CodeGen/ARM/vector-store.ll | 17 +++++++---------- llvm/test/CodeGen/X86/vshift-6.ll | 4 ++-- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ddb8a0e..523d3ae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6551,17 +6551,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, ElementCount NumElts = VT.getVectorElementCount(); - // See if we can fold through bitcasted integer ops. + // See if we can fold through any bitcasted integer ops. if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && - Ops[0].getOpcode() == ISD::BITCAST && - Ops[1].getOpcode() == ISD::BITCAST) { + (Ops[0].getOpcode() == ISD::BITCAST || + Ops[1].getOpcode() == ISD::BITCAST)) { SDValue N1 = peekThroughBitcasts(Ops[0]); SDValue N2 = peekThroughBitcasts(Ops[1]); auto *BV1 = dyn_cast(N1); auto *BV2 = dyn_cast(N2); - EVT BVVT = N1.getValueType(); - if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) { + if (BV1 && BV2 && N1.getValueType().isInteger() && + N2.getValueType().isInteger()) { bool IsLE = getDataLayout().isLittleEndian(); unsigned EltBits = VT.getScalarSizeInBits(); SmallVector RawBits1, RawBits2; @@ -6577,15 +6577,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, RawBits.push_back(*Fold); } if (RawBits.size() == NumElts.getFixedValue()) { - // We have constant folded, but we need to cast this again back to - // the original (possibly legalized) type. + // We have constant folded, but we might need to cast this again back + // to the original (possibly legalized) type. + EVT BVVT, BVEltVT; + if (N1.getValueType() == VT) { + BVVT = N1.getValueType(); + BVEltVT = BV1->getOperand(0).getValueType(); + } else { + BVVT = N2.getValueType(); + BVEltVT = BV2->getOperand(0).getValueType(); + } + unsigned BVEltBits = BVEltVT.getSizeInBits(); SmallVector DstBits; BitVector DstUndefs; BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), DstBits, RawBits, DstUndefs, BitVector(RawBits.size(), false)); - EVT BVEltVT = BV1->getOperand(0).getValueType(); - unsigned BVEltBits = BVEltVT.getSizeInBits(); SmallVector Ops(DstBits.size(), getUNDEF(BVEltVT)); for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { if (DstUndefs[I]) diff --git a/llvm/test/CodeGen/ARM/vector-store.ll b/llvm/test/CodeGen/ARM/vector-store.ll index 2f27786..a0a801d 100644 --- a/llvm/test/CodeGen/ARM/vector-store.ll +++ b/llvm/test/CodeGen/ARM/vector-store.ll @@ -617,17 +617,14 @@ define void @v3i8store(ptr %p) { ; CHECK-LE: @ %bb.0: ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vmov.i32 d16, #0xff -; CHECK-LE-NEXT: mov r1, sp -; CHECK-LE-NEXT: vmov.i32 d17, #0x0 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vand d16, d17, d16 -; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r1:32] -; CHECK-LE-NEXT: vld1.32 {d16[0]}, [r1:32] +; CHECK-LE-NEXT: movs r1, #0 +; CHECK-LE-NEXT: mov r2, sp +; CHECK-LE-NEXT: str r1, [sp] +; CHECK-LE-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-LE-NEXT: strb r1, [r0, #2] ; CHECK-LE-NEXT: vmovl.u16 q8, d16 -; CHECK-LE-NEXT: strb r2, [r0, #2] -; CHECK-LE-NEXT: vmov.32 r1, d16[0] -; CHECK-LE-NEXT: strh r1, [r0] +; CHECK-LE-NEXT: vmov.32 r2, d16[0] +; CHECK-LE-NEXT: strh r2, [r0] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/X86/vshift-6.ll b/llvm/test/CodeGen/X86/vshift-6.ll index 21c8e8d..912ff75 100644 --- a/llvm/test/CodeGen/X86/vshift-6.ll +++ b/llvm/test/CodeGen/X86/vshift-6.ll @@ -32,9 +32,9 @@ define <16 x i8> @do_not_crash(ptr, ptr, ptr, i32, i64, i8) { ; X86-NEXT: movb %al, (%ecx) ; X86-NEXT: movd %eax, %xmm1 ; X86-NEXT: psllq $56, %xmm1 -; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-NEXT: pcmpeqd %xmm3, %xmm3 ; X86-NEXT: psllw $5, %xmm1 +; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-NEXT: pxor %xmm2, %xmm2 ; X86-NEXT: pxor %xmm0, %xmm0 ; X86-NEXT: pcmpgtb %xmm1, %xmm0 @@ -64,9 +64,9 @@ define <16 x i8> @do_not_crash(ptr, ptr, ptr, i32, i64, i8) { ; X64-NEXT: movb %r9b, (%rdi) ; X64-NEXT: movd %r9d, %xmm1 ; X64-NEXT: psllq $56, %xmm1 -; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; X64-NEXT: pcmpeqd %xmm2, %xmm2 ; X64-NEXT: psllw $5, %xmm1 +; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; X64-NEXT: pxor %xmm3, %xmm3 ; X64-NEXT: pxor %xmm0, %xmm0 ; X64-NEXT: pcmpgtb %xmm1, %xmm0 -- cgit v1.1 From a284bdb31146160352da905a888da738f2661b50 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 9 Jun 2024 12:28:20 +0100 Subject: [DAG] Fold fdiv X, c2 -> fmul X, 1/c2 without AllowReciprocal if exact (#93882) This moves the combine of fdiv by constant to fmul out of an 'if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()' block, so that it triggers if the divide is exact. An extra check for Recip.isDenormal() is added as multiple places make reference to it being unsafe or slow on certain platforms. --- llvm/include/llvm/ADT/APFloat.h | 7 ++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 ++++--- llvm/test/CodeGen/AArch64/fcvt-fixed.ll | 32 ++--- llvm/test/CodeGen/AArch64/fdiv-const.ll | 23 ++-- llvm/test/CodeGen/AArch64/frem-power2.ll | 135 ++++++++++++--------- llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll | 4 +- llvm/test/CodeGen/ARM/frem-power2.ll | 52 ++++---- llvm/test/CodeGen/ARM/vdiv_combine.ll | 111 +++-------------- llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll | 24 ++-- llvm/test/CodeGen/X86/change-unsafe-fp-math.ll | 10 +- 10 files changed, 200 insertions(+), 239 deletions(-) diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 44a301e..78faadb 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -964,6 +964,13 @@ public: return Val; } + /// Factory for Positive and Negative One. + /// + /// \param Negative True iff the number should be negative. + static APFloat getOne(const fltSemantics &Sem, bool Negative = false) { + return APFloat(Sem, Negative ? -1 : 1); + } + /// Factory for Positive and Negative Infinity. /// /// \param Negative True iff the number should be negative. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e3bd4ea..4fcbe08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17262,26 +17262,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue V = combineRepeatedFPDivisors(N)) return V; - if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { - // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (auto *N1CFP = dyn_cast(N1)) { - // Compute the reciprocal 1.0 / c2. - const APFloat &N1APF = N1CFP->getValueAPF(); - APFloat Recip(N1APF.getSemantics(), 1); // 1.0 - APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is a legal fp immediate that - // isn't too nasty (eg NaN, denormal, ...). - if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty - (!LegalOperations || - // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM - // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || - TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT, ForCodeSize))) - return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT)); - } + // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or + // the loss is acceptable with AllowReciprocal. + if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) { + // Compute the reciprocal 1.0 / c2. + const APFloat &N1APF = N1CFP->getValueAPF(); + APFloat Recip = APFloat::getOne(N1APF.getSemantics()); + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if (((st == APFloat::opOK && !Recip.isDenormal()) || + (st == APFloat::opInexact && + (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) && + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT, ForCodeSize))) + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT)); + } + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 296be83..7056a4d 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -412,10 +412,10 @@ define half @scvtf_f16_i32_7(i32 %int) { ; CHECK-NO16-LABEL: scvtf_f16_i32_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -432,10 +432,10 @@ define half @scvtf_f16_i32_15(i32 %int) { ; CHECK-NO16-LABEL: scvtf_f16_i32_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -452,10 +452,10 @@ define half @scvtf_f16_i64_7(i64 %long) { ; CHECK-NO16-LABEL: scvtf_f16_i64_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -472,10 +472,10 @@ define half @scvtf_f16_i64_15(i64 %long) { ; CHECK-NO16-LABEL: scvtf_f16_i64_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -574,10 +574,10 @@ define half @ucvtf_f16_i32_7(i32 %int) { ; CHECK-NO16-LABEL: ucvtf_f16_i32_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -594,10 +594,10 @@ define half @ucvtf_f16_i32_15(i32 %int) { ; CHECK-NO16-LABEL: ucvtf_f16_i32_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -614,10 +614,10 @@ define half @ucvtf_f16_i64_7(i64 %long) { ; CHECK-NO16-LABEL: ucvtf_f16_i64_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -634,10 +634,10 @@ define half @ucvtf_f16_i64_15(i64 %long) { ; CHECK-NO16-LABEL: ucvtf_f16_i64_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/fdiv-const.ll b/llvm/test/CodeGen/AArch64/fdiv-const.ll index 5a8f733..7aa89db 100644 --- a/llvm/test/CodeGen/AArch64/fdiv-const.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-const.ll @@ -4,8 +4,8 @@ define float @divf32_2(float %a) nounwind { ; CHECK-LABEL: divf32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, #2.00000000 -; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fmov s1, #0.50000000 +; CHECK-NEXT: fmul s0, s0, s1 ; CHECK-NEXT: ret %r = fdiv float %a, 2.0 ret float %r @@ -46,8 +46,8 @@ define float @divf32_p75_arcp(float %a) nounwind { define half @divf16_2(half %a) nounwind { ; CHECK-LABEL: divf16_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov h1, #2.00000000 -; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fmov h1, #0.50000000 +; CHECK-NEXT: fmul h0, h0, h1 ; CHECK-NEXT: ret %r = fdiv half %a, 2.0 ret half %r @@ -67,9 +67,9 @@ define half @divf16_32768(half %a) nounwind { define half @divf16_32768_arcp(half %a) nounwind { ; CHECK-LABEL: divf16_32768_arcp: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #512 // =0x200 +; CHECK-NEXT: mov w8, #30720 // =0x7800 ; CHECK-NEXT: fmov h1, w8 -; CHECK-NEXT: fmul h0, h0, h1 +; CHECK-NEXT: fdiv h0, h0, h1 ; CHECK-NEXT: ret %r = fdiv arcp half %a, 32768.0 ret half %r @@ -78,8 +78,8 @@ define half @divf16_32768_arcp(half %a) nounwind { define double @divf64_2(double %a) nounwind { ; CHECK-LABEL: divf64_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, #2.00000000 -; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fmov d1, #0.50000000 +; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: ret %r = fdiv double %a, 2.0 ret double %r @@ -88,8 +88,8 @@ define double @divf64_2(double %a) nounwind { define <4 x float> @divv4f32_2(<4 x float> %a) nounwind { ; CHECK-LABEL: divv4f32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #64, lsl #24 -; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v1.4s, #63, lsl #24 +; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %r = fdiv <4 x float> %a, ret <4 x float> %r @@ -141,9 +141,8 @@ define <4 x float> @divv4f32_24816(<4 x float> %a) nounwind { define @divnxv4f32_2( %a) nounwind { ; CHECK-LABEL: divnxv4f32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z1.s, #2.00000000 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 ; CHECK-NEXT: ret %r = fdiv %a, splat (float 2.0) ret %r diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll index 4192745..98276b6 100644 --- a/llvm/test/CodeGen/AArch64/frem-power2.ll +++ b/llvm/test/CodeGen/AArch64/frem-power2.ll @@ -5,11 +5,12 @@ define float @frem2(float %x) { ; CHECK-SD-LABEL: frem2: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, #2.00000000 +; CHECK-SD-NEXT: fmov s1, #0.50000000 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s1, s2, s1, s0 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s1, s1, s2, s0 ; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24 ; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -27,10 +28,11 @@ entry: define float @frem2_nsz(float %x) { ; CHECK-SD-LABEL: frem2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, #2.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #0.50000000 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem2_nsz: @@ -65,10 +67,11 @@ define float @frem2_abs(float %x) { ; CHECK-SD-LABEL: frem2_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fmov s1, #2.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #0.50000000 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem2_abs: @@ -85,9 +88,9 @@ entry: define half @hrem2_nsz(half %x) { ; CHECK-SD-LABEL: hrem2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov h1, #2.00000000 +; CHECK-SD-NEXT: fmov h1, #0.50000000 ; CHECK-SD-NEXT: fmov h2, #-2.00000000 -; CHECK-SD-NEXT: fdiv h1, h0, h1 +; CHECK-SD-NEXT: fmul h1, h0, h1 ; CHECK-SD-NEXT: frintz h1, h1 ; CHECK-SD-NEXT: fmadd h0, h1, h2, h0 ; CHECK-SD-NEXT: ret @@ -112,10 +115,11 @@ entry: define double @drem2_nsz(double %x) { ; CHECK-SD-LABEL: drem2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov d1, #2.00000000 -; CHECK-SD-NEXT: fdiv d2, d0, d1 -; CHECK-SD-NEXT: frintz d2, d2 -; CHECK-SD-NEXT: fmsub d0, d2, d1, d0 +; CHECK-SD-NEXT: fmov d1, #0.50000000 +; CHECK-SD-NEXT: fmov d2, #-2.00000000 +; CHECK-SD-NEXT: fmul d1, d0, d1 +; CHECK-SD-NEXT: frintz d1, d1 +; CHECK-SD-NEXT: fmadd d0, d1, d2, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: drem2_nsz: @@ -176,10 +180,11 @@ entry: define float @fremm2_nsz(float %x) { ; CHECK-SD-LABEL: fremm2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, #-2.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #-0.50000000 +; CHECK-SD-NEXT: fmov s2, #2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fremm2_nsz: @@ -195,10 +200,11 @@ define float @frem4_abs(float %x) { ; CHECK-SD-LABEL: frem4_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fmov s1, #4.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #0.25000000 +; CHECK-SD-NEXT: fmov s2, #-4.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem4_abs: @@ -216,10 +222,12 @@ define float @frem16_abs(float %x) { ; CHECK-SD-LABEL: frem16_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fmov s1, #16.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #1031798784 // =0x3d800000 +; CHECK-SD-NEXT: fmov s2, #-16.00000000 +; CHECK-SD-NEXT: fmov s1, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem16_abs: @@ -237,11 +245,13 @@ define float @frem4294967296_abs(float %x) { ; CHECK-SD-LABEL: frem4294967296_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-SD-NEXT: mov w8, #796917760 // =0x2f800000 ; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #-813694976 // =0xcf800000 +; CHECK-SD-NEXT: fmov s2, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem4294967296_abs: @@ -260,11 +270,13 @@ define float @frem1152921504606846976_abs(float %x) { ; CHECK-SD-LABEL: frem1152921504606846976_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000 +; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000 ; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #-578813952 // =0xdd800000 +; CHECK-SD-NEXT: fmov s2, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem1152921504606846976_abs: @@ -283,11 +295,13 @@ define float @frem4611686018427387904_abs(float %x) { ; CHECK-SD-LABEL: frem4611686018427387904_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000 +; CHECK-SD-NEXT: mov w8, #545259520 // =0x20800000 ; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #-562036736 // =0xde800000 +; CHECK-SD-NEXT: fmov s2, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem4611686018427387904_abs: @@ -305,11 +319,12 @@ entry: define float @frem9223372036854775808_abs(float %x) { ; CHECK-SD-LABEL: frem9223372036854775808_abs: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24 +; CHECK-SD-NEXT: movi v1.2s, #32, lsl #24 ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: movi v2.2s, #223, lsl #24 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem9223372036854775808_abs: @@ -326,11 +341,12 @@ entry: define <4 x float> @frem2_vec(<4 x float> %x) { ; CHECK-SD-LABEL: frem2_vec: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24 +; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24 +; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24 ; CHECK-SD-NEXT: mov v3.16b, v0.16b -; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: frintz v2.4s, v2.4s -; CHECK-SD-NEXT: fmls v3.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: frintz v1.4s, v1.4s +; CHECK-SD-NEXT: fmls v3.4s, v2.4s, v1.4s ; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24 ; CHECK-SD-NEXT: bit v0.16b, v3.16b, v1.16b ; CHECK-SD-NEXT: ret @@ -387,10 +403,11 @@ entry: define <4 x float> @frem2_nsz_vec(<4 x float> %x) { ; CHECK-SD-LABEL: frem2_nsz_vec: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24 -; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: frintz v2.4s, v2.4s -; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24 +; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24 +; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: frintz v1.4s, v1.4s +; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem2_nsz_vec: @@ -445,12 +462,14 @@ entry: define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) { ; CHECK-SD-LABEL: frem1152921504606846976_absv: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000 +; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000 ; CHECK-SD-NEXT: fabs v0.4s, v0.4s ; CHECK-SD-NEXT: dup v1.4s, w8 -; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: frintz v2.4s, v2.4s -; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000 +; CHECK-SD-NEXT: dup v2.4s, w8 +; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: frintz v1.4s, v1.4s +; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem1152921504606846976_absv: diff --git a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll index 5386ef4..64d4a0c 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x, ; GCN-NEXT: s_mov_b32 exec_lo, s2 ; GCN-NEXT: s_cbranch_execz .LBB0_2 ; GCN-NEXT: ; %bb.1: ; %if -; GCN-NEXT: s_mov_b32 s2, 2.0 +; GCN-NEXT: s_mov_b32 s2, 0x40400000 ; GCN-NEXT: v_div_scale_f32 v1, s3, s2, s2, v0 ; GCN-NEXT: v_rcp_f32_e64 v2, v1 ; GCN-NEXT: s_mov_b32 s3, 1.0 @@ -39,7 +39,7 @@ entry: br i1 %cc, label %if, label %end if: - %v.if = fdiv float %v, 2.0 + %v.if = fdiv float %v, 3.0 br label %end end: diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll index 71c2c09..63ecd9f 100644 --- a/llvm/test/CodeGen/ARM/frem-power2.ll +++ b/llvm/test/CodeGen/ARM/frem-power2.ll @@ -14,26 +14,28 @@ define float @frem4(float %x) { ; ; CHECK-FP-LABEL: frem4: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-FP-NEXT: vmov s2, r0 +; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-FP-NEXT: vmov s4, r0 ; CHECK-FP-NEXT: lsrs r0, r0, #31 -; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-FP-NEXT: vrintz.f32 s4, s4 -; CHECK-FP-NEXT: vfms.f32 s2, s4, s0 -; CHECK-FP-NEXT: vmov r1, s2 +; CHECK-FP-NEXT: vmul.f32 s0, s4, s0 +; CHECK-FP-NEXT: vrintz.f32 s0, s0 +; CHECK-FP-NEXT: vfma.f32 s4, s0, s2 +; CHECK-FP-NEXT: vmov r1, s4 ; CHECK-FP-NEXT: bfi r1, r0, #31, #1 ; CHECK-FP-NEXT: mov r0, r1 ; CHECK-FP-NEXT: bx lr ; ; CHECK-M33-LABEL: frem4: ; CHECK-M33: @ %bb.0: @ %entry -; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-M33-NEXT: vmov s2, r0 +; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-M33-NEXT: vmov s4, r0 ; CHECK-M33-NEXT: lsrs r0, r0, #31 -; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-M33-NEXT: vrintz.f32 s4, s4 -; CHECK-M33-NEXT: vmls.f32 s2, s4, s0 -; CHECK-M33-NEXT: vmov r1, s2 +; CHECK-M33-NEXT: vmul.f32 s0, s4, s0 +; CHECK-M33-NEXT: vrintz.f32 s0, s0 +; CHECK-M33-NEXT: vmla.f32 s4, s0, s2 +; CHECK-M33-NEXT: vmov r1, s4 ; CHECK-M33-NEXT: bfi r1, r0, #31, #1 ; CHECK-M33-NEXT: mov r0, r1 ; CHECK-M33-NEXT: bx lr @@ -53,22 +55,24 @@ define float @frem4_nsz(float %x) { ; ; CHECK-FP-LABEL: frem4_nsz: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-FP-NEXT: vmov s2, r0 -; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-FP-NEXT: vrintz.f32 s4, s4 -; CHECK-FP-NEXT: vfms.f32 s2, s4, s0 -; CHECK-FP-NEXT: vmov r0, s2 +; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-FP-NEXT: vmov s4, r0 +; CHECK-FP-NEXT: vmul.f32 s0, s4, s0 +; CHECK-FP-NEXT: vrintz.f32 s0, s0 +; CHECK-FP-NEXT: vfma.f32 s4, s0, s2 +; CHECK-FP-NEXT: vmov r0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-M33-LABEL: frem4_nsz: ; CHECK-M33: @ %bb.0: @ %entry -; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-M33-NEXT: vmov s2, r0 -; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-M33-NEXT: vrintz.f32 s4, s4 -; CHECK-M33-NEXT: vmls.f32 s2, s4, s0 -; CHECK-M33-NEXT: vmov r0, s2 +; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-M33-NEXT: vmov s4, r0 +; CHECK-M33-NEXT: vmul.f32 s0, s4, s0 +; CHECK-M33-NEXT: vrintz.f32 s0, s0 +; CHECK-M33-NEXT: vmla.f32 s4, s0, s2 +; CHECK-M33-NEXT: vmov r0, s4 ; CHECK-M33-NEXT: bx lr entry: %fmod = frem nsz float %x, 4.0 diff --git a/llvm/test/CodeGen/ARM/vdiv_combine.ll b/llvm/test/CodeGen/ARM/vdiv_combine.ll index 9888446..899487f 100644 --- a/llvm/test/CodeGen/ARM/vdiv_combine.ll +++ b/llvm/test/CodeGen/ARM/vdiv_combine.ll @@ -5,10 +5,7 @@ define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d0, d0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> @@ -20,10 +17,7 @@ entry: define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.u32 d2, d0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.u32 d0, d0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float> @@ -56,17 +50,10 @@ entry: define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vldr s2, LCPI3_0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d16, d0 +; CHECK-NEXT: vmov.i32 d17, #0x2f000000 +; CHECK-NEXT: vmul.f32 d0, d16, d17 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI3_0: -; CHECK-NEXT: .long 0x50000000 @ float 8.58993459E+9 -; CHECK-NEXT: .end_data_region entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> %div.i = fdiv <2 x float> %vcvt.i, @@ -77,17 +64,8 @@ entry: define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t5: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vldr s2, LCPI4_0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d0, d0, #32 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI4_0: -; CHECK-NEXT: .long 0x4f800000 @ float 4.2949673E+9 -; CHECK-NEXT: .end_data_region entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> %div.i = fdiv <2 x float> %vcvt.i, @@ -98,12 +76,7 @@ entry: define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind { ; CHECK-LABEL: t6: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float> @@ -115,12 +88,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) { ; CHECK-LABEL: fix_unsigned_i16_to_float: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovl.u16 q8, d0 -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.u32 q2, q8 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.u32 q0, q8, #1 ; CHECK-NEXT: bx lr %conv = uitofp <4 x i16> %in to <4 x float> %shift = fdiv <4 x float> %conv, @@ -131,12 +99,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) { ; CHECK-LABEL: fix_signed_i16_to_float: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovl.s16 q8, d0 -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q8 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.s32 q0, q8, #1 ; CHECK-NEXT: bx lr %conv = sitofp <4 x i16> %in to <4 x float> %shift = fdiv <4 x float> %conv, @@ -152,13 +115,12 @@ define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl ___floatundisf ; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: vmov s18, r0 -; CHECK-NEXT: vmov.f32 s16, #2.000000e+00 +; CHECK-NEXT: vmov s19, r0 +; CHECK-NEXT: vmov.i32 d8, #0x3f000000 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl ___floatundisf -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vdiv.f32 s1, s18, s16 -; CHECK-NEXT: vdiv.f32 s0, s2, s16 +; CHECK-NEXT: vmov s18, r0 +; CHECK-NEXT: vmul.f32 d0, d9, d8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {lr} ; CHECK-NEXT: bx lr @@ -177,13 +139,13 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) { ; CHECK-NEXT: bl ___floatundidf ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: vmov.f64 d8, #2.000000e+00 +; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: bl ___floatundidf ; CHECK-NEXT: vmov d16, r0, r1 -; CHECK-NEXT: vdiv.f64 d1, d9, d8 -; CHECK-NEXT: vdiv.f64 d0, d16, d8 +; CHECK-NEXT: vmul.f64 d1, d9, d8 +; CHECK-NEXT: vmul.f64 d0, d16, d8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {lr} ; CHECK-NEXT: bx lr @@ -196,19 +158,8 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) { define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind { ; CHECK-LABEL: test7: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.f32 s12, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q4, q0 -; CHECK-NEXT: vcvt.f32.s32 q2, q1 -; CHECK-NEXT: vdiv.f32 s3, s19, s12 -; CHECK-NEXT: vdiv.f32 s7, s11, s12 -; CHECK-NEXT: vdiv.f32 s2, s18, s12 -; CHECK-NEXT: vdiv.f32 s6, s10, s12 -; CHECK-NEXT: vdiv.f32 s1, s17, s12 -; CHECK-NEXT: vdiv.f32 s5, s9, s12 -; CHECK-NEXT: vdiv.f32 s0, s16, s12 -; CHECK-NEXT: vdiv.f32 s4, s8, s12 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 +; CHECK-NEXT: vcvt.f32.s32 q1, q1, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <8 x i32> %in to <8 x float> @@ -220,19 +171,8 @@ entry: define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) { ; CHECK-LABEL: test8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 -; CHECK-NEXT: vldr s3, LCPI11_0 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI11_0: -; CHECK-NEXT: .long 0x7fc00000 @ float NaN -; CHECK-NEXT: .end_data_region %vcvt.i = sitofp <4 x i32> %in to <4 x float> %div.i = fdiv <4 x float> %vcvt.i, ret <4 x float> %div.i @@ -241,19 +181,8 @@ define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) { define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) { ; CHECK-LABEL: test_illegal_int_to_fp: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s4, #4.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 -; CHECK-NEXT: vldr s3, LCPI12_0 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI12_0: -; CHECK-NEXT: .long 0x7fc00000 @ float NaN -; CHECK-NEXT: .end_data_region %conv = sitofp <3 x i32> %in to <3 x float> %res = fdiv <3 x float> %conv, ret <3 x float> %res diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll index 47ac884..ac65a11 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -1367,7 +1367,7 @@ define void @bcast_unfold_fdiv_v16f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB42_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1386,7 +1386,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp %tmp4 = load <16 x float>, ptr %tmp2, align 4 - %tmp5 = fdiv <16 x float> %tmp4, + %tmp5 = fdiv <16 x float> %tmp4, store <16 x float> %tmp5, ptr %tmp2, align 4 %tmp7 = add i64 %tmp, 16 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1400,7 +1400,7 @@ define void @bcast_unfold_fdiv_v8f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB43_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1419,7 +1419,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp %tmp4 = load <8 x float>, ptr %tmp2, align 4 - %tmp5 = fdiv <8 x float> %tmp4, + %tmp5 = fdiv <8 x float> %tmp4, store <8 x float> %tmp5, ptr %tmp2, align 4 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1433,7 +1433,7 @@ define void @bcast_unfold_fdiv_v4f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB44_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1451,7 +1451,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp %tmp4 = load <4 x float>, ptr %tmp2, align 4 - %tmp5 = fdiv <4 x float> %tmp4, + %tmp5 = fdiv <4 x float> %tmp4, store <4 x float> %tmp5, ptr %tmp2, align 4 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1465,7 +1465,7 @@ define void @bcast_unfold_fdiv_v8f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB45_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1484,7 +1484,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp %tmp4 = load <8 x double>, ptr %tmp2, align 8 - %tmp5 = fdiv <8 x double> %tmp4, + %tmp5 = fdiv <8 x double> %tmp4, store <8 x double> %tmp5, ptr %tmp2, align 8 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1498,7 +1498,7 @@ define void @bcast_unfold_fdiv_v4f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB46_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1517,7 +1517,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp %tmp4 = load <4 x double>, ptr %tmp2, align 8 - %tmp5 = fdiv <4 x double> %tmp4, + %tmp5 = fdiv <4 x double> %tmp4, store <4 x double> %tmp5, ptr %tmp2, align 8 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1531,7 +1531,7 @@ define void @bcast_unfold_fdiv_v2f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3.0E+0,3.0E+0] ; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB47_1: # %bb1 @@ -1550,7 +1550,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp %tmp4 = load <2 x double>, ptr %tmp2, align 8 - %tmp5 = fdiv <2 x double> %tmp4, + %tmp5 = fdiv <2 x double> %tmp4, store <2 x double> %tmp5, ptr %tmp2, align 8 %tmp7 = add i64 %tmp, 2 %tmp8 = icmp eq i64 %tmp7, 1024 diff --git a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll index 33a7ec9..ba09ba8 100644 --- a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll +++ b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll @@ -14,7 +14,7 @@ define double @unsafe_fp_math_default0(double %x) { ; SAFE: divsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -22,7 +22,7 @@ define double @unsafe_fp_math_default0(double %x) { define double @unsafe_fp_math_off(double %x) #0 { ; SAFE: divsd ; UNSAFE: divsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -31,7 +31,7 @@ define double @unsafe_fp_math_default1(double %x) { ; With unsafe math enabled, can change this div to a mul. ; SAFE: divsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -39,7 +39,7 @@ define double @unsafe_fp_math_default1(double %x) { define double @unsafe_fp_math_on(double %x) #1 { ; SAFE: mulsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -48,7 +48,7 @@ define double @unsafe_fp_math_default2(double %x) { ; With unsafe math enabled, can change this div to a mul. ; SAFE: divsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } -- cgit v1.1 From 2f4ebf85457e7246ffce312fb443b4ae78172f01 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 9 Jun 2024 13:19:08 +0100 Subject: [VPlan] Handle more cases in VPInstruction::onlyFirstPartUsed. Handle binary ops and a few other instructions in onlyFirstPartUsed; they only use the first part if they themselves only have their first part used. --- llvm/lib/Transforms/Vectorize/VPlan.h | 15 +-------------- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 19 +++++++++++++++++++ llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll | 1 - .../LoopVectorize/AArch64/scalable-strict-fadd.ll | 9 --------- .../LoopVectorize/AArch64/sve-tail-folding-unroll.ll | 6 ------ 5 files changed, 20 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 943edc3..5bb88e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1325,20 +1325,7 @@ public: bool onlyFirstLaneUsed(const VPValue *Op) const override; /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - if (getOperand(0) != Op) - return false; - switch (getOpcode()) { - default: - return false; - case VPInstruction::BranchOnCount: - case VPInstruction::CanonicalIVIncrementForPart: - return true; - }; - llvm_unreachable("switch should return"); - } + bool onlyFirstPartUsed(const VPValue *Op) const override; /// Returns true if this VPInstruction produces a scalar value from a vector, /// e.g. by performing a reduction or extracting a lane. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2093aab..7a48245 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -680,6 +680,25 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { llvm_unreachable("switch should return"); } +bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const { + assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); + if (Instruction::isBinaryOp(getOpcode())) + return vputils::onlyFirstPartUsed(this); + + switch (getOpcode()) { + default: + return false; + case Instruction::ICmp: + case Instruction::Select: + return vputils::onlyFirstPartUsed(this); + case VPInstruction::BranchOnCount: + case VPInstruction::BranchOnCond: + case VPInstruction::CanonicalIVIncrementForPart: + return true; + }; + llvm_unreachable("switch should return"); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPInstruction::dump() const { VPSlotTracker SlotTracker(getParent()->getPlan()); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll index 809d2e8..34d5db4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll @@ -54,7 +54,6 @@ define i32 @pr70988() { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX_NEXT]], [[UMAX]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT7]] = icmp ult i64 [[TMP19]], [[UMAX]] ; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[ACTIVE_LANE_MASK_NEXT]], true -; CHECK-NEXT: [[TMP21:%.*]] = xor i1 [[ACTIVE_LANE_MASK_NEXT7]], true ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP17]], i32 [[TMP18]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index bcf8096..52d62b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -481,9 +481,6 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP76]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP79]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP80:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP81:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP82:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP83:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP84:%.*]] = extractelement [[TMP80]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -1799,9 +1796,6 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP94]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP97]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP98:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP99:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP100:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP101:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP102:%.*]] = extractelement [[TMP98]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP102]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -2198,9 +2192,6 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP94]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP97]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP98:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP99:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP100:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP101:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP102:%.*]] = extractelement [[TMP98]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP102]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 2acc1dd..52a4b17 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -96,9 +96,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP68]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP71]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP72:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP73:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP74:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP75:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP76:%.*]] = extractelement [[TMP72]], i32 0 ; CHECK-NEXT: br i1 [[TMP76]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -249,9 +246,6 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP90]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP94:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP95:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP96:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP97:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP98:%.*]] = extractelement [[TMP94]], i32 0 ; CHECK-NEXT: br i1 [[TMP98]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -- cgit v1.1 From cb8e9360d821f4fb03d571940a7e09d750d2cf2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sun, 9 Jun 2024 10:44:16 +0200 Subject: [clang][Interp] Implement ~ operator for complex values --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 12 ++++++++++++ clang/test/AST/Interp/complex.cpp | 2 ++ 2 files changed, 14 insertions(+) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index ff2b51e..5eecc692 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -3827,6 +3827,18 @@ bool ByteCodeExprGen::VisitComplexUnaryOperator( // we sometimes have to do the lvalue-to-rvalue conversion here manually. return this->emitArrayElemPop(classifyPrim(E->getType()), 1, E); + case UO_Not: // ~x + if (!this->visit(SubExpr)) + return false; + // Negate the imaginary component. + if (!this->emitArrayElem(ElemT, 1, E)) + return false; + if (!this->emitNeg(ElemT, E)) + return false; + if (!this->emitInitElem(ElemT, 1, E)) + return false; + return DiscardResult ? this->emitPopPtr(E) : true; + default: return this->emitInvalid(E); } diff --git a/clang/test/AST/Interp/complex.cpp b/clang/test/AST/Interp/complex.cpp index 09cb620..ea54df3 100644 --- a/clang/test/AST/Interp/complex.cpp +++ b/clang/test/AST/Interp/complex.cpp @@ -115,6 +115,8 @@ static_assert(__imag(Doubles[2]) == 0.0, ""); static_assert(__real(Doubles[3]) == 0.0, ""); static_assert(__imag(Doubles[3]) == 0.0, ""); +static_assert(~(0.5 + 1.5j) == (0.5 + -1.5j), ""); + void func(void) { __complex__ int arr; _Complex int result; -- cgit v1.1 From 69cd2d288d465b01a120d7544e99061921c66c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sun, 9 Jun 2024 13:40:36 +0200 Subject: [clang][Interp] Handle __extension__ for complex values --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 3 +++ clang/test/AST/Interp/complex.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 5eecc692..05921e8 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -3839,6 +3839,9 @@ bool ByteCodeExprGen::VisitComplexUnaryOperator( return false; return DiscardResult ? this->emitPopPtr(E) : true; + case UO_Extension: + return this->delegate(SubExpr); + default: return this->emitInvalid(E); } diff --git a/clang/test/AST/Interp/complex.cpp b/clang/test/AST/Interp/complex.cpp index ea54df3..003f33e 100644 --- a/clang/test/AST/Interp/complex.cpp +++ b/clang/test/AST/Interp/complex.cpp @@ -117,6 +117,9 @@ static_assert(__imag(Doubles[3]) == 0.0, ""); static_assert(~(0.5 + 1.5j) == (0.5 + -1.5j), ""); +static_assert(__extension__ __imag(A) == 0, ""); +static_assert(__imag(__extension__ A) == 0, ""); + void func(void) { __complex__ int arr; _Complex int result; -- cgit v1.1 From 5bb9c08d8895e9d5122411c8612521e9a84220b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sun, 9 Jun 2024 14:58:21 +0200 Subject: [clang][Interp] Reject compound assign operators pre-C++14 --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 3 +++ clang/test/SemaCXX/for-range-examples.cpp | 1 + clang/test/SemaCXX/integer-overflow.cpp | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 05921e8..ac245e7 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1834,6 +1834,9 @@ bool ByteCodeExprGen::VisitCompoundAssignOperator( std::optional RT = classify(RHS->getType()); std::optional ResultT = classify(E->getType()); + if (!Ctx.getLangOpts().CPlusPlus14) + return this->visit(RHS) && this->visit(LHS) && this->emitError(E); + if (!LT || !RT || !ResultT || !LHSComputationT) return false; diff --git a/clang/test/SemaCXX/for-range-examples.cpp b/clang/test/SemaCXX/for-range-examples.cpp index d129d50..c06bf01 100644 --- a/clang/test/SemaCXX/for-range-examples.cpp +++ b/clang/test/SemaCXX/for-range-examples.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++11 +// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++11 -fexperimental-new-constant-interpreter namespace value_range_detail { template diff --git a/clang/test/SemaCXX/integer-overflow.cpp b/clang/test/SemaCXX/integer-overflow.cpp index 6049458..d1cc8be 100644 --- a/clang/test/SemaCXX/integer-overflow.cpp +++ b/clang/test/SemaCXX/integer-overflow.cpp @@ -1,6 +1,10 @@ // RUN: %clang_cc1 %s -verify -fsyntax-only -std=gnu++98 -triple x86_64-pc-linux-gnu // RUN: %clang_cc1 %s -verify -fsyntax-only -std=gnu++2a -triple x86_64-pc-linux-gnu +// RUN: %clang_cc1 %s -verify -fsyntax-only -std=gnu++98 -triple x86_64-pc-linux-gnu -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 %s -verify -fsyntax-only -std=gnu++2a -triple x86_64-pc-linux-gnu -fexperimental-new-constant-interpreter + + typedef unsigned long long uint64_t; typedef unsigned int uint32_t; -- cgit v1.1 From cc19374afa55b6ccafb07ef0cb6550f4222bf99f Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Sun, 9 Jun 2024 16:29:50 +0200 Subject: [AMDGPU] Swap range metadata to attribute for workitem id. (#94871) Swap out range metadata to range attribute for calls to be able to deprecate range metadata on calls in the future. --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 14 +++- llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll | 21 ++--- llvm/test/CodeGen/AMDGPU/private-memory-r600.ll | 13 ++- .../promote-alloca-strip-abi-opt-attributes.ll | 6 +- .../CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll | 92 +++++++++++----------- 5 files changed, 72 insertions(+), 74 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 94ee4ac..0751c8d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -560,10 +560,16 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { else ++MaxSize; - MDBuilder MDB(I->getContext()); - MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), - APInt(32, MaxSize)); - I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + APInt Lower{32, MinSize}; + APInt Upper{32, MaxSize}; + if (auto *CI = dyn_cast(I)) { + ConstantRange Range(Lower, Upper); + CI->addRangeRetAttr(Range); + } else { + MDBuilder MDB(I->getContext()); + MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower, Upper); + I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + } return true; } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index d6841d4..bd6155890 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -52,9 +52,9 @@ ; HSAOPT: [[LDZU:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]], align 4, !range !2, !invariant.load !1 ; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16 -; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !3 -; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !3 -; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !3 +; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() ; HSAOPT: [[Y_SIZE_X_Z_SIZE:%[0-9]+]] = mul nuw nsw i32 [[EXTRACTY]], [[LDZU]] ; HSAOPT: [[YZ_X_XID:%[0-9]+]] = mul i32 [[Y_SIZE_X_Z_SIZE]], [[WORKITEM_ID_X]] @@ -68,11 +68,11 @@ ; HSAOPT: %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(3) [[LOCAL_GEP]], i32 0, i32 1 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !1 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !1 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !2 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !2 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !2 +; NOHSAOPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.y() +; NOHSAOPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.z() +; NOHSAOPT: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; NOHSAOPT: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; NOHSAOPT: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) @@ -533,8 +533,3 @@ attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" } !99 = !{i32 1, !"amdhsa_code_object_version", i32 400} ; HSAOPT: !1 = !{} -; HSAOPT: !2 = !{i32 0, i32 257} -; HSAOPT: !3 = !{i32 0, i32 256} - -; NOHSAOPT: !1 = !{i32 0, i32 257} -; NOHSAOPT: !2 = !{i32 0, i32 256} diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll index 462ab38..1f7de73 100644 --- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll @@ -12,11 +12,11 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; R600: LDS_READ ; R600: LDS_READ -; OPT: call i32 @llvm.r600.read.local.size.y(), !range !0 -; OPT: call i32 @llvm.r600.read.local.size.z(), !range !0 -; OPT: call i32 @llvm.r600.read.tidig.x(), !range !1 -; OPT: call i32 @llvm.r600.read.tidig.y(), !range !1 -; OPT: call i32 @llvm.r600.read.tidig.z(), !range !1 +; OPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.y() +; OPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.z() +; OPT: call range(i32 0, 256) i32 @llvm.r600.read.tidig.x() +; OPT: call range(i32 0, 256) i32 @llvm.r600.read.tidig.y() +; OPT: call range(i32 0, 256) i32 @llvm.r600.read.tidig.z() define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: @@ -276,7 +276,4 @@ define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ret void } -; OPT: !0 = !{i32 0, i32 257} -; OPT: !1 = !{i32 0, i32 256} - attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" } diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll index ada1b84..778fe90 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: define amdgpu_kernel void @promote_to_lds(ptr addrspace(1) %out, i32 %in) #0 { ; CHECK: call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK: call i32 @llvm.amdgcn.workitem.id.x(), !range !2 -; CHECK: call i32 @llvm.amdgcn.workitem.id.y(), !range !2 -; CHECK: call i32 @llvm.amdgcn.workitem.id.z(), !range !2 +; CHECK: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() define amdgpu_kernel void @promote_to_lds(ptr addrspace(1) %out, i32 %in) #0 { entry: %tmp = alloca [2 x i32], addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll index efc11bf..f566562 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll @@ -8,22 +8,22 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer( ; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !invariant.load !0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4, !range [[RNG1:![0-9]+]], !invariant.load !0 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2:![0-9]+]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP15]] -; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]] -; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP14]] +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[A:%.*]] +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[B:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) [[PTR0]], [[PTR1]] ; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4 @@ -50,21 +50,21 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr add define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs( ; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !invariant.load !0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP15]] -; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP14]] +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) [[PTR0]], null ; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4 @@ -89,21 +89,21 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %o define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs( ; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !invariant.load !0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP15]] -; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP14]] +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) null, [[PTR0]] ; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4 -- cgit v1.1 From 2388129d48cadcfca735a6d04b49b9ddb9c27de1 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 9 Jun 2024 22:09:14 +0700 Subject: [SPARC][IAS] Add named prefetch tag constants This adds named tag constants (such as `#one_write` and `#one_read`) for the prefetch instruction. Reviewers: jrtc27, rorth, brad0, s-barannikov Reviewed By: s-barannikov Pull Request: https://github.com/llvm/llvm-project/pull/94249 --- llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 67 ++++++++++++- .../Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp | 11 +++ .../Target/Sparc/MCTargetDesc/SparcInstPrinter.h | 2 + .../Sparc/MCTargetDesc/SparcMCTargetDesc.cpp | 5 + .../Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h | 11 +++ llvm/lib/Target/Sparc/Sparc.td | 2 + llvm/lib/Target/Sparc/SparcASITags.td | 2 - llvm/lib/Target/Sparc/SparcInstrInfo.td | 14 ++- llvm/lib/Target/Sparc/SparcPrefetchTags.td | 41 ++++++++ llvm/test/MC/Disassembler/Sparc/sparc-v9.txt | 62 +++++++++++- llvm/test/MC/Sparc/sparcv9-instructions.s | 108 ++++++++++++++++++++- 11 files changed, 312 insertions(+), 13 deletions(-) create mode 100644 llvm/lib/Target/Sparc/SparcPrefetchTags.td diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index e4f5c64..f0a3a4e 100644 --- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -91,6 +91,8 @@ class SparcAsmParser : public MCTargetAsmParser { ParseStatus parseASITag(OperandVector &Operands); + ParseStatus parsePrefetchTag(OperandVector &Operands); + template ParseStatus parseTailRelocSym(OperandVector &Operands); @@ -209,7 +211,8 @@ private: k_Immediate, k_MemoryReg, k_MemoryImm, - k_ASITag + k_ASITag, + k_PrefetchTag, } Kind; SMLoc StartLoc, EndLoc; @@ -240,6 +243,7 @@ private: struct ImmOp Imm; struct MemOp Mem; unsigned ASI; + unsigned Prefetch; }; public: @@ -253,6 +257,7 @@ public: bool isMEMri() const { return Kind == k_MemoryImm; } bool isMembarTag() const { return Kind == k_Immediate; } bool isASITag() const { return Kind == k_ASITag; } + bool isPrefetchTag() const { return Kind == k_PrefetchTag; } bool isTailRelocSym() const { return Kind == k_Immediate; } bool isCallTarget() const { @@ -337,6 +342,11 @@ public: return ASI; } + unsigned getPrefetchTag() const { + assert((Kind == k_PrefetchTag) && "Invalid access!"); + return Prefetch; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const override { return StartLoc; @@ -360,6 +370,9 @@ public: case k_ASITag: OS << "ASI tag: " << getASITag() << "\n"; break; + case k_PrefetchTag: + OS << "Prefetch tag: " << getPrefetchTag() << "\n"; + break; } } @@ -416,6 +429,11 @@ public: Inst.addOperand(MCOperand::createImm(getASITag())); } + void addPrefetchTagOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getPrefetchTag())); + } + void addMembarTagOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCExpr *Expr = getImm(); @@ -469,6 +487,15 @@ public: return Op; } + static std::unique_ptr CreatePrefetchTag(unsigned Val, SMLoc S, + SMLoc E) { + auto Op = std::make_unique(k_PrefetchTag); + Op->Prefetch = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static bool MorphToIntPairReg(SparcOperand &Op) { unsigned Reg = Op.getReg(); assert(Op.Reg.Kind == rk_IntReg); @@ -1088,6 +1115,44 @@ ParseStatus SparcAsmParser::parseASITag(OperandVector &Operands) { return ParseStatus::Success; } +ParseStatus SparcAsmParser::parsePrefetchTag(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = Parser.getTok().getEndLoc(); + int64_t PrefetchVal = 0; + + switch (getLexer().getKind()) { + case AsmToken::LParen: + case AsmToken::Integer: + case AsmToken::Identifier: + case AsmToken::Plus: + case AsmToken::Minus: + case AsmToken::Tilde: + if (getParser().parseAbsoluteExpression(PrefetchVal) || + !isUInt<5>(PrefetchVal)) + return Error(S, "invalid prefetch number, must be between 0 and 31"); + break; + case AsmToken::Hash: { + SMLoc TagStart = getLexer().peekTok(false).getLoc(); + Parser.Lex(); // Eat the '#'. + const StringRef PrefetchName = Parser.getTok().getString(); + const SparcPrefetchTag::PrefetchTag *PrefetchTag = + SparcPrefetchTag::lookupPrefetchTagByName(PrefetchName); + Parser.Lex(); // Eat the identifier token. + + if (!PrefetchTag) + return Error(TagStart, "unknown prefetch tag"); + + PrefetchVal = PrefetchTag->Encoding; + break; + } + default: + return ParseStatus::NoMatch; + } + + Operands.push_back(SparcOperand::CreatePrefetchTag(PrefetchVal, S, E)); + return ParseStatus::Success; +} + ParseStatus SparcAsmParser::parseCallTarget(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp index ef77648..5b407a8 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp @@ -253,3 +253,14 @@ void SparcInstPrinter::printASITag(const MCInst *MI, int opNum, else O << Imm; } + +void SparcInstPrinter::printPrefetchTag(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Imm = MI->getOperand(opNum).getImm(); + auto PrefetchTag = SparcPrefetchTag::lookupPrefetchTagByEncoding(Imm); + if (PrefetchTag) + O << '#' << PrefetchTag->Name; + else + O << Imm; +} diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h index cb691a3..207a970 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h @@ -56,6 +56,8 @@ public: raw_ostream &O); void printASITag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, raw_ostream &O); + void printPrefetchTag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &O); }; } // end namespace llvm diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index fb634cc..ad6ca09 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -26,6 +26,11 @@ namespace SparcASITag { #define GET_ASITagsList_IMPL #include "SparcGenSearchableTables.inc" } // end namespace SparcASITag + +namespace SparcPrefetchTag { +#define GET_PrefetchTagsList_IMPL +#include "SparcGenSearchableTables.inc" +} // end namespace SparcPrefetchTag } // end namespace llvm using namespace llvm; diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index fd76627..a2a9f74 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -48,6 +48,17 @@ struct ASITag { #define GET_ASITagsList_DECL #include "SparcGenSearchableTables.inc" } // end namespace SparcASITag + +// Defines symbolic names for Sparc v9 prefetch tag names. +namespace SparcPrefetchTag { +struct PrefetchTag { + const char *Name; + unsigned Encoding; +}; + +#define GET_PrefetchTagsList_DECL +#include "SparcGenSearchableTables.inc" +} // end namespace SparcPrefetchTag } // End llvm namespace // Defines symbolic names for Sparc registers. This defines a mapping from diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 45cf985..65f372f 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// include "llvm/Target/Target.td" +include "llvm/TableGen/SearchableTable.td" //===----------------------------------------------------------------------===// // SPARC Subtarget features. @@ -91,6 +92,7 @@ foreach i = 0 ... 5 in //===----------------------------------------------------------------------===// include "SparcASITags.td" +include "SparcPrefetchTags.td" include "SparcRegisterInfo.td" include "SparcCallingConv.td" include "SparcSchedule.td" diff --git a/llvm/lib/Target/Sparc/SparcASITags.td b/llvm/lib/Target/Sparc/SparcASITags.td index 115e41b..4b2d17b 100644 --- a/llvm/lib/Target/Sparc/SparcASITags.td +++ b/llvm/lib/Target/Sparc/SparcASITags.td @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -include "llvm/TableGen/SearchableTable.td" - class ASITag op> { string Name = name; // A maximum of one alias is supported right now. diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index 4d68f93..f1778f2 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -197,6 +197,16 @@ def ASITag : Operand { let ParserMatchClass = SparcASITagAsmOperand; } +def SparcPrefetchTagAsmOperand : AsmOperandClass { + let Name = "PrefetchTag"; + let ParserMethod = "parsePrefetchTag"; +} + +def PrefetchTag : Operand { + let PrintMethod = "printPrefetchTag"; + let ParserMatchClass = SparcPrefetchTagAsmOperand; +} + // Branch targets have OtherVT type. def brtarget : Operand { let EncoderMethod = "getBranchTargetOpValue"; @@ -1767,10 +1777,10 @@ let Predicates = [HasV9], rs1 = 0, rs2 = 0 in { // Section A.42 - Prefetch Data let Predicates = [HasV9] in { def PREFETCHr : F3_1<3, 0b101101, - (outs), (ins (MEMrr $rs1, $rs2):$addr, shift_imm5:$rd), + (outs), (ins (MEMrr $rs1, $rs2):$addr, PrefetchTag:$rd), "prefetch [$addr], $rd", []>; def PREFETCHi : F3_2<3, 0b101101, - (outs), (ins (MEMri $rs1, $simm13):$addr, shift_imm5:$rd), + (outs), (ins (MEMri $rs1, $simm13):$addr, PrefetchTag:$rd), "prefetch [$addr], $rd", []>; } diff --git a/llvm/lib/Target/Sparc/SparcPrefetchTags.td b/llvm/lib/Target/Sparc/SparcPrefetchTags.td new file mode 100644 index 0000000..0104f47 --- /dev/null +++ b/llvm/lib/Target/Sparc/SparcPrefetchTags.td @@ -0,0 +1,41 @@ +//===- SparcPrefetchTags.td --------------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the symbolic operands permitted for various kinds of +// SPARCv9 prefetches. +// +//===----------------------------------------------------------------------===// + +class PrefetchTag op> { + string Name = name; + bits<8> Encoding = op; +} + +def PrefetchTagsList : GenericTable { + let FilterClass = "PrefetchTag"; + let Fields = ["Name", "Encoding"]; + + let PrimaryKey = [ "Encoding" ]; + let PrimaryKeyName = "lookupPrefetchTagByEncoding"; +} + +def lookupPrefetchTagByName : SearchIndex { + let Table = PrefetchTagsList; + let Key = [ "Name" ]; +} + +def : PrefetchTag<"n_reads", 0x0>; +def : PrefetchTag<"one_read", 0x1>; +def : PrefetchTag<"n_writes", 0x2>; +def : PrefetchTag<"one_write", 0x3>; +def : PrefetchTag<"page", 0x4>; +def : PrefetchTag<"unified", 0x11>; +def : PrefetchTag<"n_reads_strong", 0x14>; +def : PrefetchTag<"one_read_strong", 0x15>; +def : PrefetchTag<"n_writes_strong", 0x16>; +def : PrefetchTag<"one_write_strong", 0x17>; diff --git a/llvm/test/MC/Disassembler/Sparc/sparc-v9.txt b/llvm/test/MC/Disassembler/Sparc/sparc-v9.txt index da278e1..d561216 100644 --- a/llvm/test/MC/Disassembler/Sparc/sparc-v9.txt +++ b/llvm/test/MC/Disassembler/Sparc/sparc-v9.txt @@ -132,11 +132,65 @@ # CHECK: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore | #Lookaside | #MemIssue | #Sync 0x81 0x43 0xe0 0x7f -# CHECK: prefetch [%i1+3968], 1 -0xc3,0x6e,0x6f,0x80 +# CHECK: prefetch [%i1+3968], #n_reads +0xc1 0x6e 0x6f 0x80 -# CHECK: prefetch [%i1+%i2], 1 -0xc3,0x6e,0x40,0x1a +# CHECK: prefetch [%i1+3968], #one_read +0xc3 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #n_writes +0xc5 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #one_write +0xc7 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #page +0xc9 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #unified +0xe3 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #n_reads_strong +0xe9 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #one_read_strong +0xeb 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #n_writes_strong +0xed 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+3968], #one_write_strong +0xef 0x6e 0x6f 0x80 + +# CHECK: prefetch [%i1+%i2], #n_reads +0xc1 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #one_read +0xc3 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #n_writes +0xc5 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #one_write +0xc7 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #page +0xc9 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #unified +0xe3 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #n_reads_strong +0xe9 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #one_read_strong +0xeb 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #n_writes_strong +0xed 0x6e 0x40 0x1a + +# CHECK: prefetch [%i1+%i2], #one_write_strong +0xef 0x6e 0x40 0x1a # CHECK: done 0x81,0xf0,0x00,0x00 diff --git a/llvm/test/MC/Sparc/sparcv9-instructions.s b/llvm/test/MC/Sparc/sparcv9-instructions.s index d461c82..80f67ac 100644 --- a/llvm/test/MC/Sparc/sparcv9-instructions.s +++ b/llvm/test/MC/Sparc/sparcv9-instructions.s @@ -537,16 +537,116 @@ ! V9: stxa %g0, [%g2+%i5] #ASI_SNF ! encoding: [0xc0,0xf0,0x90,0x7d] stxa %g0, [%g2 + %i5] #ASI_SNF - ! V8: error: instruction requires a CPU feature not currently enabled + ! V8: error: invalid operand for instruction ! V8-NEXT: prefetch [ %i1 + 0xf80 ], 1 - ! V9: prefetch [%i1+3968], 1 ! encoding: [0xc3,0x6e,0x6f,0x80] + ! V9: prefetch [%i1+3968], #one_read ! encoding: [0xc3,0x6e,0x6f,0x80] prefetch [ %i1 + 0xf80 ], 1 - ! V8: error: instruction requires a CPU feature not currently enabled + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #n_reads + ! V9: prefetch [%i1+3968], #n_reads ! encoding: [0xc1,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #n_reads + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #one_read + ! V9: prefetch [%i1+3968], #one_read ! encoding: [0xc3,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #one_read + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #n_writes + ! V9: prefetch [%i1+3968], #n_writes ! encoding: [0xc5,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #n_writes + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #one_write + ! V9: prefetch [%i1+3968], #one_write ! encoding: [0xc7,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #one_write + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #page + ! V9: prefetch [%i1+3968], #page ! encoding: [0xc9,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #page + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #unified + ! V9: prefetch [%i1+3968], #unified ! encoding: [0xe3,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #unified + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #n_reads_strong + ! V9: prefetch [%i1+3968], #n_reads_strong ! encoding: [0xe9,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #n_reads_strong + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #one_read_strong + ! V9: prefetch [%i1+3968], #one_read_strong ! encoding: [0xeb,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #one_read_strong + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #n_writes_strong + ! V9: prefetch [%i1+3968], #n_writes_strong ! encoding: [0xed,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #n_writes_strong + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + 0xf80 ], #one_write_strong + ! V9: prefetch [%i1+3968], #one_write_strong ! encoding: [0xef,0x6e,0x6f,0x80] + prefetch [ %i1 + 0xf80 ], #one_write_strong + + ! V8: error: invalid operand for instruction ! V8-NEXT: prefetch [ %i1 + %i2 ], 1 - ! V9: prefetch [%i1+%i2], 1 ! encoding: [0xc3,0x6e,0x40,0x1a] + ! V9: prefetch [%i1+%i2], #one_read ! encoding: [0xc3,0x6e,0x40,0x1a] prefetch [ %i1 + %i2 ], 1 + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #n_reads + ! V9: prefetch [%i1+%i2], #n_reads ! encoding: [0xc1,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #n_reads + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #one_read + ! V9: prefetch [%i1+%i2], #one_read ! encoding: [0xc3,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #one_read + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #n_writes + ! V9: prefetch [%i1+%i2], #n_writes ! encoding: [0xc5,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #n_writes + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #one_write + ! V9: prefetch [%i1+%i2], #one_write ! encoding: [0xc7,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #one_write + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #page + ! V9: prefetch [%i1+%i2], #page ! encoding: [0xc9,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #page + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #unified + ! V9: prefetch [%i1+%i2], #unified ! encoding: [0xe3,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #unified + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #n_reads_strong + ! V9: prefetch [%i1+%i2], #n_reads_strong ! encoding: [0xe9,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #n_reads_strong + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #one_read_strong + ! V9: prefetch [%i1+%i2], #one_read_strong ! encoding: [0xeb,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #one_read_strong + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #n_writes_strong + ! V9: prefetch [%i1+%i2], #n_writes_strong ! encoding: [0xed,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #n_writes_strong + + ! V8: error: unexpected token + ! V8-NEXT: prefetch [ %i1 + %i2 ], #one_write_strong + ! V9: prefetch [%i1+%i2], #one_write_strong ! encoding: [0xef,0x6e,0x40,0x1a] + prefetch [ %i1 + %i2 ], #one_write_strong + ! V8: error: instruction requires a CPU feature not currently enabled ! V8-NEXT: done ! V9: done ! encoding: [0x81,0xf0,0x00,0x00] -- cgit v1.1 From 41f2ea0b0fcd2b683c7380e1cfd3acad2feb51dd Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 9 Jun 2024 22:13:31 +0700 Subject: [SPARC][IAS] Add support for `prefetcha` instruction This adds support for `prefetcha` instruction for prefetching from alternate address spaces. Reviewers: jrtc27, brad0, rorth, s-barannikov Reviewed By: s-barannikov Pull Request: https://github.com/llvm/llvm-project/pull/94250 --- llvm/lib/Target/Sparc/SparcInstrInfo.td | 7 ++++++ llvm/test/MC/Disassembler/Sparc/sparc-v9-asi.txt | 5 ++++ llvm/test/MC/Sparc/sparcv9-instructions.s | 30 ++++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index f1778f2..cac96a1 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -1782,6 +1782,13 @@ let Predicates = [HasV9] in { def PREFETCHi : F3_2<3, 0b101101, (outs), (ins (MEMri $rs1, $simm13):$addr, PrefetchTag:$rd), "prefetch [$addr], $rd", []>; + def PREFETCHAr : F3_1_asi<3, 0b111101, (outs), + (ins (MEMrr $rs1, $rs2):$addr, ASITag:$asi, PrefetchTag:$rd), + "prefetcha [$addr] $asi, $rd", []>; + let Uses = [ASR3] in + def PREFETCHAi : F3_2<3, 0b111101, (outs), + (ins (MEMri $rs1, $simm13):$addr, PrefetchTag:$rd), + "prefetcha [$addr] %asi, $rd", []>; } diff --git a/llvm/test/MC/Disassembler/Sparc/sparc-v9-asi.txt b/llvm/test/MC/Disassembler/Sparc/sparc-v9-asi.txt index 9286814..1d1fdb6 100644 --- a/llvm/test/MC/Disassembler/Sparc/sparc-v9-asi.txt +++ b/llvm/test/MC/Disassembler/Sparc/sparc-v9-asi.txt @@ -32,3 +32,8 @@ 0xd5 0xf6 0x11 0x56 # V9: casxa [%i0] #ASI_SNF_L, %l6, %o2 0xd5 0xf6 0x11 0x76 + +# V9: prefetcha [%i1+3968] %asi, #one_read +0xc3 0xee 0x6f 0x80 +# V9: prefetcha [%i1+%i2] #ASI_SNF, #one_read +0xc3 0xee 0x50 0x7a diff --git a/llvm/test/MC/Sparc/sparcv9-instructions.s b/llvm/test/MC/Sparc/sparcv9-instructions.s index 80f67ac..1b11171 100644 --- a/llvm/test/MC/Sparc/sparcv9-instructions.s +++ b/llvm/test/MC/Sparc/sparcv9-instructions.s @@ -647,6 +647,36 @@ ! V9: prefetch [%i1+%i2], #one_write_strong ! encoding: [0xef,0x6e,0x40,0x1a] prefetch [ %i1 + %i2 ], #one_write_strong + ! V8: error: malformed ASI tag, must be a constant integer expression + ! V8-NEXT: prefetcha [ %i1 + 0xf80 ] %asi, 1 + ! V9: prefetcha [%i1+3968] %asi, #one_read ! encoding: [0xc3,0xee,0x6f,0x80] + prefetcha [ %i1 + 0xf80 ] %asi, 1 + + ! V8: error: malformed ASI tag, must be a constant integer expression + ! V8-NEXT: prefetcha [ %i1 + 0xf80 ] %asi, #one_read + ! V9: prefetcha [%i1+3968] %asi, #one_read ! encoding: [0xc3,0xee,0x6f,0x80] + prefetcha [ %i1 + 0xf80 ] %asi, #one_read + + ! V8: error: malformed ASI tag, must be a constant integer expression + ! V8-NEXT: prefetcha [ %i1 + %i2 ] #ASI_SNF, 1 + ! V9: prefetcha [%i1+%i2] #ASI_SNF, #one_read ! encoding: [0xc3,0xee,0x50,0x7a] + prefetcha [ %i1 + %i2 ] #ASI_SNF, 1 + + ! V8: error: malformed ASI tag, must be a constant integer expression + ! V8-NEXT: prefetcha [ %i1 + %i2 ] #ASI_SNF, #one_read + ! V9: prefetcha [%i1+%i2] #ASI_SNF, #one_read ! encoding: [0xc3,0xee,0x50,0x7a] + prefetcha [ %i1 + %i2 ] #ASI_SNF, #one_read + + ! V8: error: invalid operand for instruction + ! V8-NEXT: prefetcha [ %i1 + %i2 ] 131, 1 + ! V9: prefetcha [%i1+%i2] #ASI_SNF, #one_read ! encoding: [0xc3,0xee,0x50,0x7a] + prefetcha [ %i1 + %i2 ] 131, 1 + + ! V8: error: unexpected token + ! V8-NEXT: prefetcha [ %i1 + %i2 ] 131, #one_read + ! V9: prefetcha [%i1+%i2] #ASI_SNF, #one_read ! encoding: [0xc3,0xee,0x50,0x7a] + prefetcha [ %i1 + %i2 ] 131, #one_read + ! V8: error: instruction requires a CPU feature not currently enabled ! V8-NEXT: done ! V9: done ! encoding: [0x81,0xf0,0x00,0x00] -- cgit v1.1 From 8901f718ea16ceb82b6f878db53d3bcb46b4d2b2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 9 Jun 2024 08:14:51 -0700 Subject: Use StringRef::starts_with (NFC) (#94886) --- bolt/lib/Profile/BoltAddressTranslation.cpp | 2 +- clang-tools-extra/clang-query/QueryParser.cpp | 6 ++---- mlir/lib/Query/QueryParser.cpp | 6 ++---- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index cdfca2b..519f282 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -304,7 +304,7 @@ std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) { StringRef Name = Buf.slice(Offset, Offset + NameSz); Offset = alignTo(Offset + NameSz, 4); - if (Name.substr(0, 4) != "BOLT") + if (!Name.starts_with("BOLT")) return make_error_code(llvm::errc::io_error); Error Err(Error::success()); diff --git a/clang-tools-extra/clang-query/QueryParser.cpp b/clang-tools-extra/clang-query/QueryParser.cpp index 1d0b7d9..97cb264 100644 --- a/clang-tools-extra/clang-query/QueryParser.cpp +++ b/clang-tools-extra/clang-query/QueryParser.cpp @@ -144,13 +144,11 @@ QueryRef QueryParser::endQuery(QueryRef Q) { StringRef Extra = Line; StringRef ExtraTrimmed = Extra.ltrim(" \t\v\f\r"); - if ((!ExtraTrimmed.empty() && ExtraTrimmed[0] == '\n') || - (ExtraTrimmed.size() >= 2 && ExtraTrimmed[0] == '\r' && - ExtraTrimmed[1] == '\n')) + if (ExtraTrimmed.starts_with('\n') || ExtraTrimmed.starts_with("\r\n")) Q->RemainingContent = Extra; else { StringRef TrailingWord = lexWord(); - if (!TrailingWord.empty() && TrailingWord.front() == '#') { + if (TrailingWord.starts_with('#')) { Line = Line.drop_until([](char c) { return c == '\n'; }); Line = Line.drop_while([](char c) { return c == '\n'; }); return endQuery(Q); diff --git a/mlir/lib/Query/QueryParser.cpp b/mlir/lib/Query/QueryParser.cpp index 595055a..8a03463 100644 --- a/mlir/lib/Query/QueryParser.cpp +++ b/mlir/lib/Query/QueryParser.cpp @@ -91,13 +91,11 @@ QueryRef QueryParser::endQuery(QueryRef queryRef) { llvm::StringRef extra = line; llvm::StringRef extraTrimmed = extra.ltrim(" \t\v\f\r"); - if ((!extraTrimmed.empty() && extraTrimmed[0] == '\n') || - (extraTrimmed.size() >= 2 && extraTrimmed[0] == '\r' && - extraTrimmed[1] == '\n')) + if (extraTrimmed.starts_with('\n') || extraTrimmed.starts_with("\r\n")) queryRef->remainingContent = extra; else { llvm::StringRef trailingWord = lexWord(); - if (!trailingWord.empty() && trailingWord.front() == '#') { + if (trailingWord.starts_with('#')) { line = line.drop_until([](char c) { return c == '\n'; }); line = line.drop_while([](char c) { return c == '\n'; }); return endQuery(queryRef); -- cgit v1.1 From 2bc36afcdb282618d3dc08462375fffca32cd7ed Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 9 Jun 2024 22:15:15 +0700 Subject: [SPARC][IAS] Handle the case of non-4-byte aligned writeNopData If the Count passed into writeNopData is not a multiple of four, add a little amount of zeros before writing the NOP stream. This makes it match the behavior of GNU binutils. Reviewers: brad0, rorth, s-barannikov, jrtc27 Reviewed By: s-barannikov Pull Request: https://github.com/llvm/llvm-project/pull/94251 --- llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 240f539..cb7414f 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -323,9 +323,11 @@ namespace { bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override { - // Cannot emit NOP with size not multiple of 32 bits. - if (Count % 4 != 0) - return false; + + // If the count is not 4-byte aligned, we must be writing data into the + // text section (otherwise we have unaligned instructions, and thus have + // far bigger problems), so just write zeros instead. + OS.write_zeros(Count % 4); uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) -- cgit v1.1 From e0b9cce4c72508eebd489aa46fa8f613167dbcd9 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 9 Jun 2024 22:16:34 +0700 Subject: [SPARC][IAS] Add movr(n)e alias for movr(n)z This adds the alternate mnemonics for movrz and movrnz. Reviewers: s-barannikov, jrtc27, brad0, rorth Reviewed By: s-barannikov Pull Request: https://github.com/llvm/llvm-project/pull/94252 --- llvm/lib/Target/Sparc/SparcInstrAliases.td | 2 ++ llvm/test/MC/Sparc/sparc64-ctrl-instructions.s | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/llvm/lib/Target/Sparc/SparcInstrAliases.td b/llvm/lib/Target/Sparc/SparcInstrAliases.td index 2b92445..eedad25 100644 --- a/llvm/lib/Target/Sparc/SparcInstrAliases.td +++ b/llvm/lib/Target/Sparc/SparcInstrAliases.td @@ -400,9 +400,11 @@ defm : cp_cond_alias<"012", 0b1111>; let EmitPriority = 0 in defm : cp_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual defm : reg_cond_alias<"z", 0b001>; +defm : reg_cond_alias<"e", 0b001>; defm : reg_cond_alias<"lez", 0b010>; defm : reg_cond_alias<"lz", 0b011>; defm : reg_cond_alias<"nz", 0b101>; +defm : reg_cond_alias<"ne", 0b101>; defm : reg_cond_alias<"gz", 0b110>; defm : reg_cond_alias<"gez", 0b111>; diff --git a/llvm/test/MC/Sparc/sparc64-ctrl-instructions.s b/llvm/test/MC/Sparc/sparc64-ctrl-instructions.s index a21b175..1889473 100644 --- a/llvm/test/MC/Sparc/sparc64-ctrl-instructions.s +++ b/llvm/test/MC/Sparc/sparc64-ctrl-instructions.s @@ -1191,28 +1191,36 @@ brz,a,pn %g1, .BB0 ! CHECK: movrz %g1, %g2, %g3 ! encoding: [0x87,0x78,0x44,0x02] + ! CHECK: movrz %g1, %g2, %g3 ! encoding: [0x87,0x78,0x44,0x02] ! CHECK: movrlez %g1, %g2, %g3 ! encoding: [0x87,0x78,0x48,0x02] ! CHECK: movrlz %g1, %g2, %g3 ! encoding: [0x87,0x78,0x4c,0x02] ! CHECK: movrnz %g1, %g2, %g3 ! encoding: [0x87,0x78,0x54,0x02] + ! CHECK: movrnz %g1, %g2, %g3 ! encoding: [0x87,0x78,0x54,0x02] ! CHECK: movrgz %g1, %g2, %g3 ! encoding: [0x87,0x78,0x58,0x02] ! CHECK: movrgez %g1, %g2, %g3 ! encoding: [0x87,0x78,0x5c,0x02] movrz %g1, %g2, %g3 + movre %g1, %g2, %g3 movrlez %g1, %g2, %g3 movrlz %g1, %g2, %g3 movrnz %g1, %g2, %g3 + movrne %g1, %g2, %g3 movrgz %g1, %g2, %g3 movrgez %g1, %g2, %g3 ! CHECK: movrz %g1, 2, %g3 ! encoding: [0x87,0x78,0x64,0x02] + ! CHECK: movrz %g1, 2, %g3 ! encoding: [0x87,0x78,0x64,0x02] ! CHECK: movrlez %g1, 2, %g3 ! encoding: [0x87,0x78,0x68,0x02] ! CHECK: movrlz %g1, 2, %g3 ! encoding: [0x87,0x78,0x6c,0x02] ! CHECK: movrnz %g1, 2, %g3 ! encoding: [0x87,0x78,0x74,0x02] + ! CHECK: movrnz %g1, 2, %g3 ! encoding: [0x87,0x78,0x74,0x02] ! CHECK: movrgz %g1, 2, %g3 ! encoding: [0x87,0x78,0x78,0x02] ! CHECK: movrgez %g1, 2, %g3 ! encoding: [0x87,0x78,0x7c,0x02] movrz %g1, 2, %g3 + movre %g1, 2, %g3 movrlez %g1, 2, %g3 movrlz %g1, 2, %g3 movrnz %g1, 2, %g3 + movrne %g1, 2, %g3 movrgz %g1, 2, %g3 movrgez %g1, 2, %g3 -- cgit v1.1 From de736d9c6a7634285c4283b369a9442a9d191cd9 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sun, 9 Jun 2024 18:21:35 +0200 Subject: [libc++][TZDB] Implements time_zone get_info(local_time). (#89537) Implements parts of: - P0355 Extending chrono to Calendars and Time Zones --- libcxx/include/__chrono/time_zone.h | 8 + libcxx/include/chrono | 3 + libcxx/src/time_zone.cpp | 147 +++ .../libcxx/diagnostics/chrono.nodiscard.verify.cpp | 2 + .../time.zone.members/get_info.local_time.pass.cpp | 1302 ++++++++++++++++++++ 5 files changed, 1462 insertions(+) create mode 100644 libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp diff --git a/libcxx/include/__chrono/time_zone.h b/libcxx/include/__chrono/time_zone.h index 91ddab8..e28c918 100644 --- a/libcxx/include/__chrono/time_zone.h +++ b/libcxx/include/__chrono/time_zone.h @@ -16,7 +16,9 @@ // Enable the contents of the header only when libc++ was built with experimental features enabled. #if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# include <__chrono/calendar.h> # include <__chrono/duration.h> +# include <__chrono/local_info.h> # include <__chrono/sys_info.h> # include <__chrono/system_clock.h> # include <__compare/strong_order.h> @@ -63,12 +65,18 @@ public: return __get_info(chrono::time_point_cast(__time)); } + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI local_info get_info(const local_time<_Duration>& __time) const { + return __get_info(chrono::time_point_cast(__time)); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const __impl& __implementation() const noexcept { return *__impl_; } private: [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view __name() const noexcept; [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info __get_info(sys_seconds __time) const; + [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI local_info __get_info(local_seconds __time) const; unique_ptr<__impl> __impl_; }; diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 96a3e92..4d8398a 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -763,6 +763,9 @@ class time_zone { template sys_info get_info(const sys_time& st) const; + + template + local_info get_info(const local_time& tp) const; }; bool operator==(const time_zone& x, const time_zone& y) noexcept; // C++20 strong_ordering operator<=>(const time_zone& x, const time_zone& y) noexcept; // C++20 diff --git a/libcxx/src/time_zone.cpp b/libcxx/src/time_zone.cpp index 928f3d2..764a89a 100644 --- a/libcxx/src/time_zone.cpp +++ b/libcxx/src/time_zone.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "include/tzdb/time_zone_private.h" @@ -903,6 +904,152 @@ time_zone::__get_info(sys_seconds __time) const { std::__throw_runtime_error("tzdb: corrupt db"); } +// Is the "__local_time" present in "__first" and "__second". If so the +// local_info has an ambiguous result. +[[nodiscard]] static bool +__is_ambiguous(local_seconds __local_time, const sys_info& __first, const sys_info& __second) { + std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset}; + std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset}; + + return __local_time < __end_first && __local_time >= __begin_second; +} + +// Determines the result of the "__local_time". This expects the object +// "__first" to be earlier in time than "__second". +[[nodiscard]] static local_info +__get_info(local_seconds __local_time, const sys_info& __first, const sys_info& __second) { + std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset}; + std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset}; + + if (__local_time < __end_first) { + if (__local_time >= __begin_second) + // |--------| + // |------| + // ^ + return {local_info::ambiguous, __first, __second}; + + // |--------| + // |------| + // ^ + return {local_info::unique, __first, sys_info{}}; + } + + if (__local_time < __begin_second) + // |--------| + // |------| + // ^ + return {local_info::nonexistent, __first, __second}; + + // |--------| + // |------| + // ^ + return {local_info::unique, __second, sys_info{}}; +} + +[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI local_info +time_zone::__get_info(local_seconds __local_time) const { + seconds __local_seconds = __local_time.time_since_epoch(); + + /* An example of a typical year with a DST switch displayed in local time. + * + * At the first of April the time goes forward one hour. This means the + * time marked with ~~ is not a valid local time. This is represented by the + * nonexistent value in local_info.result. + * + * At the first of November the time goes backward one hour. This means the + * time marked with ^^ happens twice. This is represented by the ambiguous + * value in local_info.result. + * + * 2020.11.01 2021.04.01 2021.11.01 + * offset +05 offset +05 offset +05 + * save 0s save 1h save 0s + * |------------//----------| + * |---------//--------------| + * |------------- + * ~~ ^^ + * + * These shifts can happen due to changes in the current time zone for a + * location. For example, Indian/Kerguelen switched only once. In 1950 from an + * offset of 0 hours to an offset of +05 hours. + * + * During all these shifts the UTC time will not have gaps. + */ + + // The code needs to determine the system time for the local time. There is no + // information available. Assume the offset between system time and local time + // is 0s. This gives an initial estimate. + sys_seconds __guess{__local_seconds}; + sys_info __info = __get_info(__guess); + + // At this point the offset can be used to determine an estimate for the local + // time. Before doing that, determine the offset and validate whether the + // local time is the range [chrono::local_seconds::min(), + // chrono::local_seconds::max()). + if (__local_seconds < 0s && __info.offset > 0s) + if (__local_seconds - chrono::local_seconds::min().time_since_epoch() < __info.offset) + return {-1, __info, {}}; + + if (__local_seconds > 0s && __info.offset < 0s) + if (chrono::local_seconds::max().time_since_epoch() - __local_seconds < -__info.offset) + return {-2, __info, {}}; + + // Based on the information found in the sys_info, the local time can be + // converted to a system time. This resulting time can be in the following + // locations of the sys_info: + // + // |---------//--------------| + // 1 2.1 2.2 2.3 3 + // + // 1. The estimate is before the returned sys_info object. + // The result is either non-existent or unique in the previous sys_info. + // 2. The estimate is in the sys_info object + // - If the sys_info begin is not sys_seconds::min(), then it might be at + // 2.1 and could be ambiguous with the previous or unique. + // - If sys_info end is not sys_seconds::max(), then it might be at 2.3 + // and could be ambiguous with the next or unique. + // - Else it is at 2.2 and always unique. This case happens when a + // time zone has no transitions. For example, UTC or GMT+1. + // 3. The estimate is after the returned sys_info object. + // The result is either non-existent or unique in the next sys_info. + // + // There is no specification where the "middle" starts. Similar issues can + // happen when sys_info objects are "short", then "unique in the next" could + // become "ambiguous in the next and the one following". Theoretically there + // is the option of the following time-line + // + // |------------| + // |----| + // |-----------------| + // + // However the local_info object only has 2 sys_info objects, so this option + // is not tested. + + sys_seconds __sys_time{__local_seconds - __info.offset}; + if (__sys_time < __info.begin) + // Case 1 before __info + return chrono::__get_info(__local_time, __get_info(__info.begin - 1s), __info); + + if (__sys_time >= __info.end) + // Case 3 after __info + return chrono::__get_info(__local_time, __info, __get_info(__info.end)); + + // Case 2 in __info + if (__info.begin != sys_seconds::min()) { + // Case 2.1 Not at the beginning, when not ambiguous the result should test + // case 2.3. + sys_info __prev = __get_info(__info.begin - 1s); + if (__is_ambiguous(__local_time, __prev, __info)) + return {local_info::ambiguous, __prev, __info}; + } + + if (__info.end == sys_seconds::max()) + // At the end so it's case 2.2 + return {local_info::unique, __info, sys_info{}}; + + // This tests case 2.2 or case 2.3. + return chrono::__get_info(__local_time, __info, __get_info(__info.end)); +} + } // namespace chrono _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp index a5ce5d1..fea1e44 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp @@ -48,8 +48,10 @@ void test() { { std::chrono::sys_seconds s{}; + std::chrono::local_seconds l{}; tz.name(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} tz.get_info(s); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + tz.get_info(l); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} operator==(tz, tz); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} operator<=>(tz, tz); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} } diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp new file mode 100644 index 0000000..6dc1597 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp @@ -0,0 +1,1302 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-experimental-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class time_zone; + +// template +// local_info get_info(const local_time<_Duration>& time) const; + +// This test uses the system provided database. This makes the test portable, +// but may cause failures when the database information changes. Historic data +// may change if new facts are uncovered, future data may change when regions +// change their time zone or daylight saving time. Most tests will not look in +// the future to attempt to avoid issues. All tests list the data on which they +// are based, this makes debugging easier upon failure; including to see whether +// the provided data has not been changed. +// +// The first part of the test is manually crafted, the second part compares the +// transitions for all time zones in the database. + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +// The year range to validate. The dates used in practice are expected to be +// inside the tested range. +constexpr std::chrono::year first{1800}; +constexpr std::chrono::year last{2100}; + +/***** ***** HELPERS ***** *****/ + +[[nodiscard]] static std::chrono::sys_seconds to_sys_seconds( + std::chrono::year year, + std::chrono::month month, + std::chrono::day day, + std::chrono::hours h = std::chrono::hours(0), + std::chrono::minutes m = std::chrono::minutes{0}, + std::chrono::seconds s = std::chrono::seconds{0}) { + std::chrono::year_month_day result{year, month, day}; + + return std::chrono::time_point_cast(static_cast(result)) + h + m + s; +} + +[[nodiscard]] static std::chrono::local_seconds to_local_seconds( + std::chrono::year year, + std::chrono::month month, + std::chrono::day day, + std::chrono::hours h = std::chrono::hours(0), + std::chrono::minutes m = std::chrono::minutes{0}, + std::chrono::seconds s = std::chrono::seconds{0}) { + std::chrono::year_month_day result{year, month, day}; + + return std::chrono::time_point_cast(static_cast(result)) + h + m + s; +} + +static void assert_equal(const std::chrono::sys_info& lhs, const std::chrono::sys_info& rhs) { + TEST_REQUIRE(lhs.begin == rhs.begin, + TEST_WRITE_CONCATENATED("\nBegin:\nExpected output ", lhs.begin, "\nActual output ", rhs.begin, '\n')); + TEST_REQUIRE(lhs.end == rhs.end, + TEST_WRITE_CONCATENATED("\nEnd:\nExpected output ", lhs.end, "\nActual output ", rhs.end, '\n')); + TEST_REQUIRE( + lhs.offset == rhs.offset, + TEST_WRITE_CONCATENATED("\nOffset:\nExpected output ", lhs.offset, "\nActual output ", rhs.offset, '\n')); + TEST_REQUIRE(lhs.save == rhs.save, + TEST_WRITE_CONCATENATED("\nSave:\nExpected output ", lhs.save, "\nActual output ", rhs.save, '\n')); + TEST_REQUIRE( + lhs.abbrev == rhs.abbrev, + TEST_WRITE_CONCATENATED("\nAbbrev:\nExpected output ", lhs.abbrev, "\nActual output ", rhs.abbrev, '\n')); +} + +static void assert_equal(const std::chrono::local_info& lhs, const std::chrono::local_info& rhs) { + TEST_REQUIRE( + lhs.result == rhs.result, + TEST_WRITE_CONCATENATED("\nResult:\nExpected output ", lhs.result, "\nActual output ", rhs.result, '\n')); + + assert_equal(lhs.first, rhs.first); + assert_equal(lhs.second, rhs.second); +} + +/***** ***** TESTS ***** *****/ + +static void test_gmt() { + // Simple zone always valid, no rule entries, lookup using a link. + // L Etc/GMT GMT + // Z Etc/GMT 0 - GMT + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("GMT"); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info(std::chrono::sys_seconds::min(), std::chrono::sys_seconds::max(), 0s, 0min, "GMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); +} + +static void test_local_time_out_of_range() { + // Fixed positive offset + // Etc/GMT-1 1 - +01 + + using namespace std::literals::chrono_literals; + { // lower bound + const std::chrono::time_zone* tz = std::chrono::locate_zone("Etc/GMT-1"); + + assert_equal( + std::chrono::local_info( + -1, + std::chrono::sys_info(std::chrono::sys_seconds::min(), std::chrono::sys_seconds::max(), 1h, 0min, "+01"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); + + assert_equal( + std::chrono::local_info( + -1, + std::chrono::sys_info(std::chrono::sys_seconds::min(), std::chrono::sys_seconds::max(), 1h, 0min, "+01"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min() + 59min + 59s)); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info(std::chrono::sys_seconds::min(), std::chrono::sys_seconds::max(), 1h, 0min, "+01"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min() + 1h)); + } + + { // upper bound + const std::chrono::time_zone* tz = std::chrono::locate_zone("Etc/GMT+1"); + + assert_equal( + std::chrono::local_info( + -2, + std::chrono::sys_info(std::chrono::sys_seconds::min(), std::chrono::sys_seconds::max(), -1h, 0min, "-01"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::max() - 1s)); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info(std::chrono::sys_seconds::min(), std::chrono::sys_seconds::max(), -1h, 0min, "-01"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::max() - 1h - 1s)); + } +} + +static void test_indian_kerguelen() { + // One change, no rules, no dst changes. + + // Z Indian/Kerguelen 0 - -00 1950 + // 5 - +05 + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Indian/Kerguelen"); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), to_sys_seconds(1950y, std::chrono::January, 1d), 0s, 0min, "-00"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), to_sys_seconds(1950y, std::chrono::January, 1d), 0s, 0min, "-00"), + std::chrono::sys_info( + to_sys_seconds(1950y, std::chrono::January, 1d), std::chrono::sys_seconds::max(), 5h, 0min, "+05")), + tz->get_info(to_local_seconds(1950y, std::chrono::January, 1d))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1950y, std::chrono::January, 1d), std::chrono::sys_seconds::max(), 5h, 0min, "+05"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1950y, std::chrono::January, 1d, 5h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1950y, std::chrono::January, 1d), std::chrono::sys_seconds::max(), 5h, 0min, "+05"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::max() - 1s)); +} + +static void test_antarctica_rothera() { + // One change, no rules, no dst changes + + // Z Antarctica/Rothera 0 - -00 1976 D + // -3 - -03 + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Antarctica/Rothera"); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), to_sys_seconds(1976y, std::chrono::December, 1d), 0s, 0min, "-00"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), to_sys_seconds(1976y, std::chrono::December, 1d), 0s, 0min, "-00"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1976y, std::chrono::November, 30d, 20h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), to_sys_seconds(1976y, std::chrono::December, 1d), 0s, 0min, "-00"), + std::chrono::sys_info( + to_sys_seconds(1976y, std::chrono::December, 1d), std::chrono::sys_seconds::max(), -3h, 0min, "-03")), + tz->get_info(to_local_seconds(1976y, std::chrono::November, 30d, 21h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), to_sys_seconds(1976y, std::chrono::December, 1d), 0s, 0min, "-00"), + std::chrono::sys_info( + to_sys_seconds(1976y, std::chrono::December, 1d), std::chrono::sys_seconds::max(), -3h, 0min, "-03")), + tz->get_info(to_local_seconds(1976y, std::chrono::November, 30d, 23h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1976y, std::chrono::December, 1d), std::chrono::sys_seconds::max(), -3h, 0min, "-03"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1976y, std::chrono::December, 1d))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1976y, std::chrono::December, 1d), std::chrono::sys_seconds::max(), -3h, 0min, "-03"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::max() - 3h - 1s)); + + assert_equal( + std::chrono::local_info( + -2, + std::chrono::sys_info( + to_sys_seconds(1976y, std::chrono::December, 1d), std::chrono::sys_seconds::max(), -3h, 0min, "-03"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::max() - 1s)); +} + +static void test_asia_hong_kong() { + // A more typical entry, first some hard-coded entires and then at the + // end a rules based entry. This rule is valid for its entire period + // + // Z Asia/Hong_Kong 7:36:42 - LMT 1904 O 30 0:36:42 + // 8 - HKT 1941 Jun 15 3 + // 8 1 HKST 1941 O 1 4 + // 8 0:30 HKWT 1941 D 25 + // 9 - JST 1945 N 18 2 + // 8 HK HK%sT + // + // R HK 1946 o - Ap 21 0 1 S + // R HK 1946 o - D 1 3:30s 0 - + // R HK 1947 o - Ap 13 3:30s 1 S + // R HK 1947 o - N 30 3:30s 0 - + // R HK 1948 o - May 2 3:30s 1 S + // R HK 1948 1952 - O Su>=28 3:30s 0 - + // R HK 1949 1953 - Ap Su>=1 3:30 1 S + // R HK 1953 1964 - O Su>=31 3:30 0 - + // R HK 1954 1964 - Mar Su>=18 3:30 1 S + // R HK 1965 1976 - Ap Su>=16 3:30 1 S + // R HK 1965 1976 - O Su>=16 3:30 0 - + // R HK 1973 o - D 30 3:30 1 S + // R HK 1979 o - May 13 3:30 1 S + // R HK 1979 o - O 21 3:30 0 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Asia/Hong_Kong"); + + assert_equal( + std::chrono::local_info( + -1, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + 7h + 36min + 42s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); + + assert_equal( + std::chrono::local_info( + -1, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + 7h + 36min + 42s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min() + 7h + 36min + 41s)); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + 7h + 36min + 42s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min() + 7h + 36min + 42s)); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + 7h + 36min + 42s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1904y, std::chrono::October, 30d, 0h, 36min, 41s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + 7h + 36min + 42s, + 0min, + "LMT"), + std::chrono::sys_info( + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + 8h, + 0min, + "HKT")), + tz->get_info(to_local_seconds(1904y, std::chrono::October, 30d, 0h, 36min, 42s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + 7h + 36min + 42s, + 0min, + "LMT"), + std::chrono::sys_info( + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + 8h, + 0min, + "HKT")), + tz->get_info(to_local_seconds(1904y, std::chrono::October, 30d, 0h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + 8h, + 0min, + "HKT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1904y, std::chrono::October, 30d, 1h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + 8h, + 0min, + "HKT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1941y, std::chrono::June, 15d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + 8h, + 0min, + "HKT"), + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + 9h, + 60min, + "HKST")), + tz->get_info(to_local_seconds(1941y, std::chrono::June, 15d, 3h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + 8h, + 0min, + "HKT"), + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + 9h, + 60min, + "HKST")), + tz->get_info(to_local_seconds(1941y, std::chrono::June, 15d, 3h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + 9h, + 60min, + "HKST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1941y, std::chrono::June, 15d, 4h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + 9h, + 60min, + "HKST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1941y, std::chrono::October, 1d, 3h, 29min, 29s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + 9h, + 60min, + "HKST"), + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + to_sys_seconds(1941y, std::chrono::December, 24d, 15h, 30min), + 8h + 30min, + 30min, + "HKWT")), + tz->get_info(to_local_seconds(1941y, std::chrono::October, 1d, 3h, 30min))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::June, 14d, 19h), + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + 9h, + 60min, + "HKST"), + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + to_sys_seconds(1941y, std::chrono::December, 24d, 15h, 30min), + 8h + 30min, + 30min, + "HKWT")), + tz->get_info(to_local_seconds(1941y, std::chrono::October, 1d, 3h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1941y, std::chrono::September, 30d, 19h), + to_sys_seconds(1941y, std::chrono::December, 24d, 15h, 30min), + 8h + 30min, + 30min, + "HKWT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1941y, std::chrono::October, 1d, 4h))); +} + +static void test_europe_berlin() { + // A more typical entry, first some hard-coded entires and then at the + // end a rules based entry. This rule is valid for its entire period + // + + // Z Europe/Berlin 0:53:28 - LMT 1893 Ap + // 1 c CE%sT 1945 May 24 2 + // 1 So CE%sT 1946 + // 1 DE CE%sT 1980 + // 1 E CE%sT + // + // R c 1916 o - Ap 30 23 1 S + // R c 1916 o - O 1 1 0 - + // R c 1917 1918 - Ap M>=15 2s 1 S + // R c 1917 1918 - S M>=15 2s 0 - + // R c 1940 o - Ap 1 2s 1 S + // R c 1942 o - N 2 2s 0 - + // R c 1943 o - Mar 29 2s 1 S + // R c 1943 o - O 4 2s 0 - + // R c 1944 1945 - Ap M>=1 2s 1 S + // R c 1944 o - O 2 2s 0 - + // R c 1945 o - S 16 2s 0 - + // R c 1977 1980 - Ap Su>=1 2s 1 S + // R c 1977 o - S lastSu 2s 0 - + // R c 1978 o - O 1 2s 0 - + // R c 1979 1995 - S lastSu 2s 0 - + // R c 1981 ma - Mar lastSu 2s 1 S + // R c 1996 ma - O lastSu 2s 0 - + // + // R So 1945 o - May 24 2 2 M + // R So 1945 o - S 24 3 1 S + // R So 1945 o - N 18 2s 0 - + // + // R DE 1946 o - Ap 14 2s 1 S + // R DE 1946 o - O 7 2s 0 - + // R DE 1947 1949 - O Su>=1 2s 0 - + // R DE 1947 o - Ap 6 3s 1 S + // R DE 1947 o - May 11 2s 2 M + // R DE 1947 o - Jun 29 3 1 S + // R DE 1948 o - Ap 18 2s 1 S + // R DE 1949 o - Ap 10 2s 1 S + // + // R E 1977 1980 - Ap Su>=1 1u 1 S + // R E 1977 o - S lastSu 1u 0 - + // R E 1978 o - O 1 1u 0 - + // R E 1979 1995 - S lastSu 1u 0 - + // R E 1981 ma - Mar lastSu 1u 1 S + // R E 1996 ma - O lastSu 1u 0 - + // + // Note the European Union decided to stop the seasonal change in + // 2021. In 2023 seasonal changes are still in effect. + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Europe/Berlin"); + + assert_equal( + std::chrono::local_info( + -1, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 32s), + 53min + 28s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); + + assert_equal( + std::chrono::local_info( + -1, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 32s), + 53min + 28s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min() + 53min + 27s)); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 32s), + 53min + 28s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min() + 53min + 28s)); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 32s), + 53min + 28s, + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1893y, std::chrono::March, 31d, 23h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1946y, std::chrono::October, 7d, 1h), + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + 1h, + 0min, + "CET"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::April, 6d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1946y, std::chrono::October, 7d, 1h), + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + 1h, + 0min, + "CET"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + 2h, + 60min, + "CEST")), + tz->get_info(to_local_seconds(1947y, std::chrono::April, 6d, 3h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1946y, std::chrono::October, 7d, 1h), + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + 1h, + 0min, + "CET"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + 2h, + 60min, + "CEST")), + tz->get_info(to_local_seconds(1947y, std::chrono::April, 6d, 3h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::April, 6d, 4h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::May, 11d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + to_sys_seconds(1947y, std::chrono::June, 29d), + 3h, + 120min, + "CEMT")), + tz->get_info(to_local_seconds(1947y, std::chrono::May, 11d, 3h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::April, 6d, 2h), + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + to_sys_seconds(1947y, std::chrono::June, 29d), + 3h, + 120min, + "CEMT")), + tz->get_info(to_local_seconds(1947y, std::chrono::May, 11d, 3h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + to_sys_seconds(1947y, std::chrono::June, 29d), + 3h, + 120min, + "CEMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::May, 11d, 4h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + to_sys_seconds(1947y, std::chrono::June, 29d), + 3h, + 120min, + "CEMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::June, 29d, 1h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + to_sys_seconds(1947y, std::chrono::June, 29d), + 3h, + 120min, + "CEMT"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::June, 29d), + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + 2h, + 60min, + "CEST")), + tz->get_info(to_local_seconds(1947y, std::chrono::June, 29d, 2h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::May, 11d, 1h), + to_sys_seconds(1947y, std::chrono::June, 29d), + 3h, + 120min, + "CEMT"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::June, 29d), + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + 2h, + 60min, + "CEST")), + tz->get_info(to_local_seconds(1947y, std::chrono::June, 29d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::June, 29d), + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::June, 29d, 3h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::June, 29d), + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::October, 5d, 1h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::June, 29d), + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + to_sys_seconds(1948y, std::chrono::April, 18d, 1h), + 1h, + 0min, + "CET")), + tz->get_info(to_local_seconds(1947y, std::chrono::October, 5d, 2h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::June, 29d), + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + 2h, + 60min, + "CEST"), + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + to_sys_seconds(1948y, std::chrono::April, 18d, 1h), + 1h, + 0min, + "CET")), + tz->get_info(to_local_seconds(1947y, std::chrono::October, 5d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1947y, std::chrono::October, 5d, 1h), + to_sys_seconds(1948y, std::chrono::April, 18d, 1h), + 1h, + 0min, + "CET"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1947y, std::chrono::October, 5d, 3h))); +} + +static void test_europe_dublin() { + // Z Europe/Dublin -0:25:21 - LMT 1880 Au 2 + // -0:25:21 - DMT 1916 May 21 2s + // -0:25:21 1 IST 1916 O 1 2s + // 0 G %s 1921 D 6 + // ... + // + // R G 1916 o - May 21 2s 1 BST + // R G 1916 o - O 1 2s 0 GMT + // R G 1917 o - Ap 8 2s 1 BST + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Europe/Dublin"); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1880y, std::chrono::August, 2d, 0h, 25min, 21s), + -(25min + 21s), + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1880y, std::chrono::August, 2d, 0h, 25min, 21s), + -(25min + 21s), + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1880y, std::chrono::August, 1d, 23h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1880y, std::chrono::August, 2d, 0h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + -(25min + 21s), + 0min, + "DMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1880y, std::chrono::August, 2d))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1880y, std::chrono::August, 2d, 0h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + -(25min + 21s), + 0min, + "DMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1916y, std::chrono::May, 21d, 1h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1880y, std::chrono::August, 2d, 0h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + -(25min + 21s), + 0min, + "DMT"), + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + 34min + 39s, + 60min, + "IST")), + tz->get_info(to_local_seconds(1916y, std::chrono::May, 21d, 2h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1880y, std::chrono::August, 2d, 0h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + -(25min + 21s), + 0min, + "DMT"), + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + 34min + 39s, + 60min, + "IST")), + tz->get_info(to_local_seconds(1916y, std::chrono::May, 21d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + 34min + 39s, + 60min, + "IST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1916y, std::chrono::May, 21d, 6h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + 34min + 39s, + 60min, + "IST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1916y, std::chrono::October, 1d, 2h, 25min, 20s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::May, 21d, 2h, 25min, 21s), + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + 34min + 39s, + 60min, + "IST"), + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + to_sys_seconds(1917y, std::chrono::April, 8d, 2h), + 0s, + 0min, + "GMT")), + tz->get_info(to_local_seconds(1916y, std::chrono::October, 1d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + to_sys_seconds(1917y, std::chrono::April, 8d, 2h), + 0s, + 0min, + "GMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1916y, std::chrono::October, 1d, 3h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::October, 1d, 02h, 25min, 21s), + to_sys_seconds(1917y, std::chrono::April, 8d, 2h), + 0s, + 0min, + "GMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1917y, std::chrono::April, 8d, 1h, 59min, 59s))); +} + +static void test_america_st_johns() { + // A more typical entry, + // Uses letters both when DST is ative and not and has multiple + // letters. Uses negetive offsets. + // Switches several times between their own and Canadian rules + // Switches the stdoff from -3:30:52 to -3:30 while observing the same rule + + // Z America/St_Johns -3:30:52 - LMT 1884 + // -3:30:52 j N%sT 1918 + // -3:30:52 C N%sT 1919 + // ... + // + // R j 1917 o - Ap 8 2 1 D + // R j 1917 o - S 17 2 0 S + // R j 1919 o - May 5 23 1 D + // R j 1919 o - Au 12 23 0 S + // R j 1920 1935 - May Su>=1 23 1 D + // ... + // + // R C 1918 o - Ap 14 2 1 D + // R C 1918 o - O 27 2 0 S + // R C 1942 o - F 9 2 1 W + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/St_Johns"); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(std::chrono::local_seconds::min())); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "LMT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1883y, std::chrono::December, 31d, 23h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "NST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1884y, std::chrono::January, 1d))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "NST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1917y, std::chrono::April, 8d, 1h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "NST"), + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + -(2h + 30min + 52s), + 60min, + "NDT")), + tz->get_info(to_local_seconds(1917y, std::chrono::April, 8d, 2h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::nonexistent, + std::chrono::sys_info( + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "NST"), + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + -(2h + 30min + 52s), + 60min, + "NDT")), + tz->get_info(to_local_seconds(1917y, std::chrono::April, 8d, 2h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + -(2h + 30min + 52s), + 60min, + "NDT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1917y, std::chrono::April, 8d, 3h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + -(2h + 30min + 52s), + 60min, + "NDT"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1917y, std::chrono::September, 17d, 0h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + -(2h + 30min + 52s), + 60min, + "NDT"), + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + to_sys_seconds(1918y, std::chrono::April, 14d, 5h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "NST")), + tz->get_info(to_local_seconds(1917y, std::chrono::September, 17d, 1h))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::ambiguous, + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s), + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + -(2h + 30min + 52s), + 60min, + "NDT"), + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + to_sys_seconds(1918y, std::chrono::April, 14d, 5h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "NST")), + tz->get_info(to_local_seconds(1917y, std::chrono::September, 17d, 1h, 59min, 59s))); + + assert_equal( + std::chrono::local_info( + std::chrono::local_info::unique, + std::chrono::sys_info( + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s), + to_sys_seconds(1918y, std::chrono::April, 14d, 5h, 30min, 52s), + -(3h + 30min + 52s), + 0min, + "NST"), + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + tz->get_info(to_local_seconds(1917y, std::chrono::September, 17d, 2h))); +} + +static void validate_transitions(const std::chrono::time_zone& zone) { + using namespace std::literals::chrono_literals; + + constexpr auto begin = std::chrono::time_point_cast( + static_cast(std::chrono::year_month_day{first, std::chrono::January, 1d})); + constexpr auto end = std::chrono::time_point_cast( + static_cast(std::chrono::year_month_day{last, std::chrono::January, 1d})); + + // Builds the set of sys_info objects for the selected time range. + std::vector input; + std::chrono::sys_seconds s = begin; + do { + input.emplace_back(zone.get_info(s)); + s = input.back().end; + } while (s < end); + + for (auto previous = input.begin(), next = previous + 1; next != input.end(); ++previous, ++next) { + // Now iterates to all adjacent objects. + // For every transition gets the locate time of the + // - end of the first (a) + // - the start if the second (b) + // Depending on the difference between 'a' and 'b' different tests are done. + std::chrono::local_seconds end_previous{previous->end.time_since_epoch() + previous->offset}; + std::chrono::local_seconds begin_next{next->begin.time_since_epoch() + next->offset}; + + if (end_previous == begin_next) { + // unique transition + // a |------------| + // b |----------| + // T + assert_equal(std::chrono::local_info( + std::chrono::local_info::unique, + *previous, + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + zone.get_info(end_previous - 1s)); + + assert_equal(std::chrono::local_info( + std::chrono::local_info::unique, + *next, + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + zone.get_info(begin_next)); + + } else if (end_previous < begin_next) { + // non-existent transition + // a |------------| + // b |----------| + // T T + assert_equal(std::chrono::local_info( + std::chrono::local_info::unique, + *previous, + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + zone.get_info(end_previous - 1s)); + + assert_equal(std::chrono::local_info(std::chrono::local_info::nonexistent, *previous, *next), + zone.get_info(end_previous)); + + assert_equal(std::chrono::local_info(std::chrono::local_info::nonexistent, *previous, *next), + zone.get_info(begin_next - 1s)); + + assert_equal(std::chrono::local_info( + std::chrono::local_info::unique, + *next, + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + zone.get_info(begin_next)); + + } else { + // ambiguous transition + // a |------------| + // b |----------| + // T T + assert_equal(std::chrono::local_info( + std::chrono::local_info::unique, + *previous, + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + zone.get_info(begin_next - 1s)); + + assert_equal(std::chrono::local_info(std::chrono::local_info::ambiguous, *previous, *next), + zone.get_info(begin_next)); + + assert_equal(std::chrono::local_info(std::chrono::local_info::ambiguous, *previous, *next), + zone.get_info(end_previous - 1s)); + + assert_equal(std::chrono::local_info( + std::chrono::local_info::unique, + *next, + std::chrono::sys_info(std::chrono::sys_seconds(0s), std::chrono::sys_seconds(0s), 0s, 0min, "")), + zone.get_info(end_previous)); + } + } +} + +int main(int, const char**) { + test_gmt(); + test_local_time_out_of_range(); + test_indian_kerguelen(); + test_antarctica_rothera(); + + test_asia_hong_kong(); + test_europe_berlin(); + test_europe_dublin(); + test_america_st_johns(); + + const std::chrono::tzdb& tzdb = std::chrono::get_tzdb(); + for (const auto& zone : tzdb.zones) { + validate_transitions(zone); + } + + return 0; +} -- cgit v1.1 From f7ccb320859b9e341f74dc0cd7791a3b871eb937 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 9 Jun 2024 09:32:52 -0700 Subject: [Instrumentation] Remove an extraneous ArrayRef (NFC) (#94890) We can implicitly convert RemainingVDs to an ArrayRef. Note that RemainingVDs is of type SmallVector. --- llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index fd0f69e..fa93f4b 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -402,8 +402,7 @@ bool MemOPSizeOpt::perform(MemOp MO) { // If all promoted, we don't need the MD.prof metadata. if (SavedRemainCount > 0 || Version != NumVals) { // Otherwise we need update with the un-promoted records back. - ArrayRef RemVDs(RemainingVDs); - annotateValueSite(*Func.getParent(), *MO.I, RemVDs, SavedRemainCount, + annotateValueSite(*Func.getParent(), *MO.I, RemainingVDs, SavedRemainCount, IPVK_MemOPSize, NumVals); } -- cgit v1.1 From e090bac638e56ff9db87e622cdf925f2b99dfc30 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Sat, 8 Jun 2024 20:13:30 -0300 Subject: [clang] NFC: add new cwg2398 tests --- clang/test/SemaTemplate/cwg2398.cpp | 112 ++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/clang/test/SemaTemplate/cwg2398.cpp b/clang/test/SemaTemplate/cwg2398.cpp index f7f69e9..7675d42 100644 --- a/clang/test/SemaTemplate/cwg2398.cpp +++ b/clang/test/SemaTemplate/cwg2398.cpp @@ -200,8 +200,120 @@ namespace consistency { template struct A, B, B>; // new-error@-1 {{ambiguous partial specializations}} } // namespace t2 + namespace t3 { + template struct A; + + template class TT1, + class T1, class T2, class T3, class T4> + struct A, TT1, typename nondeduced>::type> {}; + // new-note@-1 {{partial specialization matches}} + + template class UU1, + class U1, class U2> + struct A, UU1, typename nondeduced>::type>; + // new-note@-1 {{partial specialization matches}} + + template struct A, B, B>; + // new-error@-1 {{ambiguous partial specializations}} + } // namespace t3 + namespace t4 { + template struct A; + + template class TT1, + class T1, class T2, class T3, class T4> + struct A, TT1, typename nondeduced>::type> {}; + // new-note@-1 {{partial specialization matches}} + + template class UU1, + class U1, class U2> + struct A, UU1, typename nondeduced>::type>; + // new-note@-1 {{partial specialization matches}} + + template struct A, B, B>; + // new-error@-1 {{ambiguous partial specializations}} + } // namespace t4 + namespace t5 { + template struct A; + + template class TT1, + class T1, class T2, class T3, class T4> + struct A, TT1> {}; + // new-note@-1 {{partial specialization matches}} + + template class UU1, + class U1, class U2> + struct A, UU1>; + // new-note@-1 {{partial specialization matches}} + + template struct A, B>; + // new-error@-1 {{ambiguous partial specializations}} + } // namespace t5 + namespace t6 { + template struct A; + + template class TT1, + class T1, class T2, class T3> + struct A, TT1> {}; + // new-note@-1 {{partial specialization matches}} + + template class UU1, + class U1, class U2> + struct A, UU1>; + // new-note@-1 {{partial specialization matches}} + + template struct A, B>; + // new-error@-1 {{ambiguous partial specializations}} + } // namespace t6 } // namespace consistency +namespace classes { + namespace canon { + template struct A {}; + + template class TT> auto f(TT a) { return a; } + // old-note@-1 2{{template template argument has different template parameters}} + // new-note@-2 2{{substitution failure: too few template arguments}} + + A v1; + A v2; + + using X = decltype(f(v1)); + // expected-error@-1 {{no matching function for call}} + + using X = decltype(f(v2)); + // expected-error@-1 {{no matching function for call}} + } // namespace canon + namespace expr { + template struct A { + static constexpr auto val = E1; + }; + template