diff options
300 files changed, 14337 insertions, 1588 deletions
diff --git a/.ci/premerge_advisor_explain.py b/.ci/premerge_advisor_explain.py new file mode 100644 index 0000000..06c6cb9 --- /dev/null +++ b/.ci/premerge_advisor_explain.py @@ -0,0 +1,63 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""Script for getting explanations from the premerge advisor.""" + +import argparse +import os +import platform +import sys + +import requests + +import generate_test_report_lib + +PREMERGE_ADVISOR_URL = ( + "http://premerge-advisor.premerge-advisor.svc.cluster.local:5000/explain" +) + + +def main(commit_sha: str, build_log_files: list[str]): + junit_objects, ninja_logs = generate_test_report_lib.load_info_from_files( + build_log_files + ) + test_failures = generate_test_report_lib.get_failures(junit_objects) + current_platform = f"{platform.system()}-{platform.machine()}".lower() + explanation_request = { + "base_commit_sha": commit_sha, + "platform": current_platform, + "failures": [], + } + if test_failures: + for _, failures in test_failures.items(): + for name, failure_messsage in failures: + explanation_request["failures"].append( + {"name": name, "message": failure_messsage} + ) + else: + ninja_failures = generate_test_report_lib.find_failure_in_ninja_logs(ninja_logs) + for name, failure_message in ninja_failures: + explanation_request["failures"].append( + {"name": name, "message": failure_message} + ) + advisor_response = requests.get(PREMERGE_ADVISOR_URL, json=explanation_request) + if advisor_response.status_code == 200: + print(advisor_response.json()) + else: + print(advisor_response.reason) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("commit_sha", help="The base commit SHA for the test.") + parser.add_argument( + "build_log_files", help="Paths to JUnit report files and ninja logs.", nargs="*" + ) + args = parser.parse_args() + + # Skip looking for results on AArch64 for now because the premerge advisor + # service is not available on AWS currently. + if platform.machine() == "arm64": + sys.exit(0) + + main(args.commit_sha, args.build_log_files) diff --git a/.ci/utils.sh b/.ci/utils.sh index 375ed29..dc8ce9b 100644 --- a/.ci/utils.sh +++ b/.ci/utils.sh @@ -43,6 +43,11 @@ function at-exit { python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \ $(git rev-parse HEAD~1) $GITHUB_RUN_NUMBER \ "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log + if [[ "$GITHUB_ACTIONS" != "" ]]; then + python "${MONOREPO_ROOT}"/.ci/premerge_advisor_explain.py \ + $(git rev-parse HEAD~1) "${BUILD_DIR}"/test-results.*.xml \ + "${MONOREPO_ROOT}"/ninja*.log + fi fi } trap at-exit EXIT diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 64fb60a4..3a0a291 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -53,6 +53,13 @@ /mlir/include/mlir/Interfaces/DestinationStyleOpInterface.* @matthias-springer /mlir/lib/Interfaces/DestinationStyleOpInterface.* @matthias-springer +# AMDGPU and ROCDL dialects in MLIR. +/mlir/include/mlir/Dialect/AMDGPU @krzysz00 @kuhar +/mlir/lib/Dialect/AMDGPU @krzysz00 @kuhar +/mlir/lib/Conversion/*AMDGPU* @krzysz00 @kuhar +/mlir/lib/Conversion/*ToROCDL @krzysz00 @kuhar +/mlir/include/mlir/Dialect/LLVMIR/ROCDL* @krzysz00 @kuhar + # Bufferization Dialect in MLIR. /mlir/include/mlir/Dialect/Bufferization @matthias-springer /mlir/lib/Dialect/Bufferization @matthias-springer diff --git a/.github/workflows/pr-code-lint.yml b/.github/workflows/pr-code-lint.yml index 776ec4a..e67b518 100644 --- a/.github/workflows/pr-code-lint.yml +++ b/.github/workflows/pr-code-lint.yml @@ -20,7 +20,7 @@ jobs: run: shell: bash container: - image: 'ghcr.io/llvm/ci-ubuntu-24.04:latest' + image: 'ghcr.io/llvm/ci-ubuntu-24.04-lint' timeout-minutes: 60 concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -31,6 +31,11 @@ jobs: with: fetch-depth: 2 + # FIXME: same as in ".github/workflows/pr-code-format.yml" + - name: Set Safe Directory + run: | + chown -R root $(pwd) + - name: Get changed files id: changed-files uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5 @@ -46,22 +51,6 @@ jobs: run: | echo "Changed files:" echo "$CHANGED_FILES" - - # The clang tidy version should always be upgraded to the first version - # of a release cycle (x.1.0) or the last version of a release cycle, or - # if there have been relevant clang-format backports. - - name: Install clang-tidy - uses: aminya/setup-cpp@a276e6e3d1db9160db5edc458e99a30d3b109949 # v1.7.1 - with: - clang-tidy: 21.1.0 - - - name: Setup Python env - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.13' - - - name: Install Python dependencies - run: python3 -m pip install -r llvm/utils/git/requirements_linting.txt # TODO: create special mapping for 'codegen' targets, for now build predefined set # TODO: add entrypoint in 'compute_projects.py' that only adds a project and its direct dependencies diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 9580163..6fa66ab 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2665,8 +2665,9 @@ void RewriteInstance::readRelocations(const SectionRef &Section) { return; } const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) - .Cases(".plt", ".rela.plt", ".got.plt", - ".eh_frame", ".gcc_except_table", true) + .Cases({".plt", ".rela.plt", ".got.plt", + ".eh_frame", ".gcc_except_table"}, + true) .Default(false); if (SkipRelocs) { LLVM_DEBUG( diff --git a/clang-tools-extra/clangd/unittests/lit.cfg.py b/clang-tools-extra/clangd/unittests/lit.cfg.py index 33aa9e6..666e987 100644 --- a/clang-tools-extra/clangd/unittests/lit.cfg.py +++ b/clang-tools-extra/clangd/unittests/lit.cfg.py @@ -19,12 +19,12 @@ config.parallelism_group = "clangd" if platform.system() == "Darwin": shlibpath_var = "DYLD_LIBRARY_PATH" -elif platform.system() == "Windows": +elif platform.system() == "Windows" or sys.platform == "cygwin": shlibpath_var = "PATH" else: shlibpath_var = "LD_LIBRARY_PATH" config.environment[shlibpath_var] = os.path.pathsep.join( - ("@SHLIBDIR@", "@LLVM_LIBS_DIR@", config.environment.get(shlibpath_var, "")) + (config.shlibdir, config.llvm_libs_dir, config.environment.get(shlibpath_var, "")) ) # It is not realistically possible to account for all options that could diff --git a/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py b/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py index 0963351..c4454df 100644 --- a/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py +++ b/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py @@ -11,12 +11,12 @@ import platform if platform.system() == "Darwin": shlibpath_var = "DYLD_LIBRARY_PATH" -elif platform.system() == "Windows": +elif platform.system() == "Windows" or sys.platform == "cygwin": shlibpath_var = "PATH" else: shlibpath_var = "LD_LIBRARY_PATH" config.environment[shlibpath_var] = os.path.pathsep.join( - ("@SHLIBDIR@", "@LLVM_LIBS_DIR@", config.environment.get(shlibpath_var, "")) + (config.shlibdir, config.llvm_libs_dir, config.environment.get(shlibpath_var, "")) ) # It is not realistically possible to account for all options that could diff --git a/clang-tools-extra/test/Unit/lit.cfg.py b/clang-tools-extra/test/Unit/lit.cfg.py index b7376a0..0254829 100644 --- a/clang-tools-extra/test/Unit/lit.cfg.py +++ b/clang-tools-extra/test/Unit/lit.cfg.py @@ -21,7 +21,7 @@ config.test_format = lit.formats.GoogleTest(".", "Tests") if platform.system() == "Darwin": shlibpath_var = "DYLD_LIBRARY_PATH" -elif platform.system() == "Windows": +elif platform.system() == "Windows" or sys.platform == "cygwin": shlibpath_var = "PATH" else: shlibpath_var = "LD_LIBRARY_PATH" diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index d03c778..54b3ce0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -185,7 +185,8 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def cvttss2si : X86Builtin<"int(_Vector<4, float>)">; } -let Features = "sse", Attributes = [NoThrow, RequiredVectorWidth<128>] in { +let Features = "sse", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def movmskps : X86Builtin<"int(_Vector<4, float>)">; } @@ -211,11 +212,6 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">; } -let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def movmskpd : X86Builtin<"int(_Vector<2, double>)">; - def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">; -} - let Features = "sse2", Attributes = [NoThrow] in { def movnti : X86Builtin<"void(int *, int)">; } @@ -224,6 +220,8 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; + def movmskpd : X86Builtin<"int(_Vector<2, double>)">; + def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">; } let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { @@ -334,8 +332,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, " "_Vector<2,double>, _Constant char)">; - def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; - def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; + def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, " + "_Vector<16, char>, _Constant char)">; } let Features = "sse4.1", @@ -358,6 +356,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; + def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">; def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">; @@ -498,9 +497,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">; def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; - def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">; - def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">; - def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">; def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">; def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">; def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">; @@ -521,6 +517,9 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; + def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">; + def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">; + def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">; def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; @@ -562,11 +561,8 @@ let Features = "avx", def vtestnzcps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">; def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; - def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; -} - -let Features = "avx", - Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def ptestnzc256 + : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">; def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } @@ -605,9 +601,8 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; - def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; - - def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; + def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, " + "_Vector<32, char>, _Constant int)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -627,11 +622,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; - def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">; } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; @@ -694,6 +689,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; + def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">; } let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in { @@ -1095,7 +1091,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def extractf64x4_mask : X86Builtin<"_Vector<4, double>(_Vector<8, double>, _Constant int, _Vector<4, double>, unsigned char)">; def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">; } @@ -2960,24 +2956,24 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in { def pmovqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def extractf32x8_mask : X86Builtin<"_Vector<8, float>(_Vector<16, float>, _Constant int, _Vector<8, float>, unsigned char)">; def extractf64x2_512_mask : X86Builtin<"_Vector<2, double>(_Vector<8, double>, _Constant int, _Vector<2, double>, unsigned char)">; def extracti32x8_mask : X86Builtin<"_Vector<8, int>(_Vector<16, int>, _Constant int, _Vector<8, int>, unsigned char)">; def extracti64x2_512_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def extracti32x4_mask : X86Builtin<"_Vector<4, int>(_Vector<16, int>, _Constant int, _Vector<4, int>, unsigned char)">; def extracti64x4_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, long long int>, _Constant int, _Vector<4, long long int>, unsigned char)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def extractf64x2_256_mask : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int, _Vector<2, double>, unsigned char)">; def extracti64x2_256_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def extractf32x4_256_mask : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int, _Vector<4, float>, unsigned char)">; def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 22de85d..5ff4cc4 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13676,6 +13676,9 @@ def err_acc_reduction_recipe_no_op "not have a valid operation available">; def note_acc_reduction_recipe_noop_field : Note<"while forming combiner for compound type %0">; +def note_acc_reduction_combiner_forming + : Note<"while forming %select{|binary operator '%1'|conditional " + "operator|final assignment operator}0">; // AMDGCN builtins diagnostics def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">; diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index a515cbf..0e17b30 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -30,9 +30,6 @@ public: // the first and last known member in the family, e.g. IsRedHat(). AlpineLinux, ArchLinux, - DebianLenny, - DebianSqueeze, - DebianWheezy, DebianJessie, DebianStretch, DebianBuster, @@ -42,16 +39,13 @@ public: DebianForky, DebianDuke, Exherbo, - RHEL5, - RHEL6, RHEL7, + RHEL8, + RHEL9, + RHEL10, Fedora, Gentoo, OpenSUSE, - UbuntuMaverick, - UbuntuNatty, - UbuntuOneiric, - UbuntuPrecise, UbuntuQuantal, UbuntuRaring, UbuntuSaucy, @@ -121,17 +115,17 @@ public: /// @{ bool IsRedhat() const { - return DistroVal == Fedora || (DistroVal >= RHEL5 && DistroVal <= RHEL7); + return DistroVal == Fedora || (DistroVal >= RHEL7 && DistroVal <= RHEL10); } bool IsOpenSUSE() const { return DistroVal == OpenSUSE; } bool IsDebian() const { - return DistroVal >= DebianLenny && DistroVal <= DebianDuke; + return DistroVal >= DebianJessie && DistroVal <= DebianDuke; } bool IsUbuntu() const { - return DistroVal >= UbuntuMaverick && DistroVal <= UbuntuResolute; + return DistroVal >= UbuntuQuantal && DistroVal <= UbuntuResolute; } bool IsAlpineLinux() const { return DistroVal == AlpineLinux; } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h index 7613952..db4aec7 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h @@ -48,6 +48,9 @@ class FunctionSummariesTy { /// The number of times the function has been inlined. unsigned TimesInlined : 32; + /// Running time for syntax-based AST analysis in milliseconds. + std::optional<unsigned> SyntaxRunningTime = std::nullopt; + FunctionSummary() : TotalBasicBlocks(0), InlineChecked(0), MayInline(0), TimesInlined(0) {} @@ -69,6 +72,11 @@ public: return I; } + FunctionSummary const *findSummary(const Decl *D) const { + auto I = Map.find(D); + return I == Map.end() ? nullptr : &I->second; + } + void markMayInline(const Decl *D) { MapTy::iterator I = findOrInsertSummary(D); I->second.InlineChecked = 1; diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 683e916..4f4b122 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -566,10 +566,15 @@ const Function *Context::getOrCreateFunction(const FunctionDecl *FuncDecl) { // Assign descriptors to all parameters. // Composite objects are lowered to pointers. - for (const ParmVarDecl *PD : FuncDecl->parameters()) { + const auto *FuncProto = FuncDecl->getType()->getAs<FunctionProtoType>(); + for (auto [ParamIndex, PD] : llvm::enumerate(FuncDecl->parameters())) { bool IsConst = PD->getType().isConstQualified(); bool IsVolatile = PD->getType().isVolatileQualified(); + if (!getASTContext().hasSameType(PD->getType(), + FuncProto->getParamType(ParamIndex))) + return nullptr; + OptPrimType T = classify(PD->getType()); PrimType PT = T.value_or(PT_Ptr); Descriptor *Desc = P->createDescriptor(PD, PT, nullptr, std::nullopt, diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0cb4910..39b991c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2899,6 +2899,35 @@ static bool interp__builtin_ia32_test_op( return true; } +static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 1); + + const Pointer &Source = S.Stk.pop<Pointer>(); + + unsigned SourceLen = Source.getNumElems(); + QualType ElemQT = getElemType(Source); + OptPrimType ElemT = S.getContext().classify(ElemQT); + unsigned ResultLen = + S.getASTContext().getTypeSize(Call->getType()); // Always 32-bit integer. + APInt Result(ResultLen, 0); + + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Elem; + if (ElemQT->isIntegerType()) { + INT_TYPE_SWITCH_NO_BOOL(*ElemT, { Elem = Source.elem<T>(I).toAPSInt(); }); + } else if (ElemQT->isRealFloatingType()) { + using T = PrimConv<PT_Float>::T; + Elem = Source.elem<T>(I).getAPFloat().bitcastToAPInt(); + } else { + return false; + } + Result.setBitVal(I, Elem.isNegative()); + } + pushInteger(S, Result, Call->getType()); + return true; +} + static bool interp__builtin_elementwise_triop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)> @@ -2962,6 +2991,82 @@ static bool interp__builtin_elementwise_triop( return true; } +static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 2); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + uint64_t Index = ImmAPS.getZExtValue(); + + const Pointer &Src = S.Stk.pop<Pointer>(); + if (!Src.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &Dst = S.Stk.peek<Pointer>(); + if (!Dst.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned SrcElems = Src.getNumElems(); + unsigned DstElems = Dst.getNumElems(); + + unsigned NumLanes = SrcElems / DstElems; + unsigned Lane = static_cast<unsigned>(Index % NumLanes); + unsigned ExtractPos = Lane * DstElems; + + PrimType ElemT = Src.getFieldDesc()->getPrimType(); + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != DstElems; ++I) { + Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I); + } + }); + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_x86_extract_vector_masked(InterpState &S, + CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 4); + + APSInt MaskAPS = popToAPSInt(S, Call->getArg(3)); + const Pointer &Merge = S.Stk.pop<Pointer>(); + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop<Pointer>(); + + if (!Src.getFieldDesc()->isPrimitiveArray() || + !Merge.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &Dst = S.Stk.peek<Pointer>(); + if (!Dst.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned SrcElems = Src.getNumElems(); + unsigned DstElems = Dst.getNumElems(); + + unsigned NumLanes = SrcElems / DstElems; + unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes); + unsigned Base = Lane * DstElems; + + PrimType ElemT = Src.getFieldDesc()->getPrimType(); + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != DstElems; ++I) { + if (MaskAPS[I]) + Dst.elem<T>(I) = Src.elem<T>(Base + I); + else + Dst.elem<T>(I) = Merge.elem<T>(I); + } + }); + + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { @@ -3003,6 +3108,45 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 1); + + const Pointer &Source = S.Stk.pop<Pointer>(); + const Pointer &Dest = S.Stk.peek<Pointer>(); + + unsigned SourceLen = Source.getNumElems(); + QualType ElemQT = getElemType(Source); + OptPrimType ElemT = S.getContext().classify(ElemQT); + unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT); + + bool DestUnsigned = Call->getCallReturnType(S.getASTContext()) + ->castAs<VectorType>() + ->getElementType() + ->isUnsignedIntegerOrEnumerationType(); + + INT_TYPE_SWITCH_NO_BOOL(*ElemT, { + APSInt MinIndex(ElemBitWidth, DestUnsigned); + APSInt MinVal = Source.elem<T>(0).toAPSInt(); + + for (unsigned I = 1; I != SourceLen; ++I) { + APSInt Val = Source.elem<T>(I).toAPSInt(); + if (MinVal.ugt(Val)) { + MinVal = Val; + MinIndex = I; + } + } + + Dest.elem<T>(0) = static_cast<T>(MinVal); + Dest.elem<T>(1) = static_cast<T>(MinIndex); + for (unsigned I = 2; I != SourceLen; ++I) { + Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned)); + } + }); + Dest.initializeAllElements(); + return true; +} + static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC, const CallExpr *Call, bool MaskZ) { assert(Call->getNumArgs() == 5); @@ -3620,6 +3764,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID); + + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extractf64x4_mask: + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, BuiltinID); case clang::X86::BI__builtin_ia32_pmulhrsw128: case clang::X86::BI__builtin_ia32_pmulhrsw256: @@ -3630,6 +3793,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, .extractBits(16, 1); }); + case clang::X86::BI__builtin_ia32_movmskps: + case clang::X86::BI__builtin_ia32_movmskpd: + case clang::X86::BI__builtin_ia32_pmovmskb128: + case clang::X86::BI__builtin_ia32_pmovmskb256: + case clang::X86::BI__builtin_ia32_movmskps256: + case clang::X86::BI__builtin_ia32_movmskpd256: { + return interp__builtin_ia32_movmsk_op(S, OpPC, Call); + } + case clang::X86::BI__builtin_ia32_pavgb128: case clang::X86::BI__builtin_ia32_pavgw128: case clang::X86::BI__builtin_ia32_pavgb256: @@ -4087,6 +4259,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_phminposuw128: + return interp__builtin_ia32_phminposuw(S, OpPC, Call); + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index f048076..8579e51 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3380,11 +3380,11 @@ bool FunctionDecl::isMSVCRTEntryPoint() const { return false; return llvm::StringSwitch<bool>(getName()) - .Cases("main", // an ANSI console app - "wmain", // a Unicode console App - "WinMain", // an ANSI GUI app - "wWinMain", // a Unicode GUI app - "DllMain", // a DLL + .Cases({"main", // an ANSI console app + "wmain", // a Unicode console App + "WinMain", // an ANSI GUI app + "wWinMain", // a Unicode GUI app + "DllMain"}, // a DLL true) .Default(false); } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e308c17..00aaaab 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11811,6 +11811,73 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: { + APValue SourceVec, SourceImm; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) || + !EvaluateAsRValue(Info, E->getArg(1), SourceImm)) + return false; + + if (!SourceVec.isVector()) + return false; + + const auto *RetVT = E->getType()->castAs<VectorType>(); + unsigned RetLen = RetVT->getNumElements(); + unsigned Idx = SourceImm.getInt().getZExtValue() & 1; + + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(RetLen); + + for (unsigned I = 0; I < RetLen; I++) + ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I)); + + return Success(APValue(ResultElements.data(), RetLen), E); + } + + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + case X86::BI__builtin_ia32_extractf64x4_mask: { + APValue SourceVec, MergeVec; + APSInt Imm, MaskImm; + + if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) || + !EvaluateInteger(E->getArg(1), Imm, Info) || + !EvaluateAsRValue(Info, E->getArg(2), MergeVec) || + !EvaluateInteger(E->getArg(3), MaskImm, Info)) + return false; + + const auto *RetVT = E->getType()->castAs<VectorType>(); + unsigned RetLen = RetVT->getNumElements(); + + if (!SourceVec.isVector() || !MergeVec.isVector()) + return false; + unsigned SrcLen = SourceVec.getVectorLength(); + unsigned Lanes = SrcLen / RetLen; + unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes); + unsigned Base = Lane * RetLen; + + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(RetLen); + for (unsigned I = 0; I < RetLen; ++I) { + if (MaskImm[I]) + ResultElements.push_back(SourceVec.getVectorElt(Base + I)); + else + ResultElements.push_back(MergeVec.getVectorElt(I)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_pavgb128: case clang::X86::BI__builtin_ia32_pavgw128: case clang::X86::BI__builtin_ia32_pavgb256: @@ -12353,6 +12420,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_phminposuw128: { + APValue Source; + if (!Evaluate(Source, Info, E->getArg(0))) + return false; + unsigned SourceLen = Source.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>(); + QualType ElemQT = VT->getElementType(); + unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT); + + APInt MinIndex(ElemBitWidth, 0); + APInt MinVal = Source.getVectorElt(0).getInt(); + for (unsigned I = 1; I != SourceLen; ++I) { + APInt Val = Source.getVectorElt(I).getInt(); + if (MinVal.ugt(Val)) { + MinVal = Val; + MinIndex = I; + } + } + + bool ResultUnsigned = E->getCallReturnType(Info.Ctx) + ->castAs<VectorType>() + ->getElementType() + ->isUnsignedIntegerOrEnumerationType(); + + SmallVector<APValue, 8> Result; + Result.reserve(SourceLen); + Result.emplace_back(APSInt(MinVal, ResultUnsigned)); + Result.emplace_back(APSInt(MinIndex, ResultUnsigned)); + for (unsigned I = 0; I != SourceLen - 2; ++I) { + Result.emplace_back(APSInt(APInt(ElemBitWidth, 0), ResultUnsigned)); + } + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: @@ -15268,6 +15369,36 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(CarryOut, E); } + case clang::X86::BI__builtin_ia32_movmskps: + case clang::X86::BI__builtin_ia32_movmskpd: + case clang::X86::BI__builtin_ia32_pmovmskb128: + case clang::X86::BI__builtin_ia32_pmovmskb256: + case clang::X86::BI__builtin_ia32_movmskps256: + case clang::X86::BI__builtin_ia32_movmskpd256: { + APValue Source; + if (!Evaluate(Source, Info, E->getArg(0))) + return false; + unsigned SourceLen = Source.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>(); + QualType ElemQT = VT->getElementType(); + unsigned ResultLen = Info.Ctx.getTypeSize( + E->getCallReturnType(Info.Ctx)); // Always 32-bit integer. + APInt Result(ResultLen, 0); + + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Elem; + if (ElemQT->isIntegerType()) { + Elem = Source.getVectorElt(I).getInt(); + } else if (ElemQT->isRealFloatingType()) { + Elem = Source.getVectorElt(I).getFloat().bitcastToAPInt(); + } else { + return false; + } + Result.setBitVal(I, Elem.isNegative()); + } + return Success(Result, E); + } + case clang::X86::BI__builtin_ia32_bextr_u32: case clang::X86::BI__builtin_ia32_bextr_u64: case clang::X86::BI__builtin_ia32_bextri_u32: diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp index de6ccff..a999d14 100644 --- a/clang/lib/Basic/Targets/Mips.cpp +++ b/clang/lib/Basic/Targets/Mips.cpp @@ -68,11 +68,11 @@ void MipsTargetInfo::fillValidCPUList( unsigned MipsTargetInfo::getISARev() const { return llvm::StringSwitch<unsigned>(getCPU()) - .Cases("mips32", "mips64", 1) - .Cases("mips32r2", "mips64r2", "octeon", "octeon+", 2) - .Cases("mips32r3", "mips64r3", 3) - .Cases("mips32r5", "mips64r5", "p5600", 5) - .Cases("mips32r6", "mips64r6", "i6400", "i6500", 6) + .Cases({"mips32", "mips64"}, 1) + .Cases({"mips32r2", "mips64r2", "octeon", "octeon+"}, 2) + .Cases({"mips32r3", "mips64r3"}, 3) + .Cases({"mips32r5", "mips64r5", "p5600"}, 5) + .Cases({"mips32r6", "mips64r6", "i6400", "i6500"}, 6) .Default(0); } diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 4897c29..0d364e1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -2394,6 +2394,180 @@ LValue CIRGenFunction::emitPredefinedLValue(const PredefinedExpr *e) { return emitStringLiteralLValue(sl, gvName); } +LValue CIRGenFunction::emitOpaqueValueLValue(const OpaqueValueExpr *e) { + assert(OpaqueValueMappingData::shouldBindAsLValue(e)); + return getOrCreateOpaqueLValueMapping(e); +} + +namespace { +// Handle the case where the condition is a constant evaluatable simple integer, +// which means we don't have to separately handle the true/false blocks. +std::optional<LValue> handleConditionalOperatorLValueSimpleCase( + CIRGenFunction &cgf, const AbstractConditionalOperator *e) { + const Expr *condExpr = e->getCond(); + llvm::APSInt condExprVal; + if (!cgf.constantFoldsToSimpleInteger(condExpr, condExprVal)) + return std::nullopt; + + const Expr *live = e->getTrueExpr(), *dead = e->getFalseExpr(); + if (!condExprVal.getBoolValue()) + std::swap(live, dead); + + if (cgf.containsLabel(dead)) + return std::nullopt; + + // If the true case is live, we need to track its region. + assert(!cir::MissingFeatures::incrementProfileCounter()); + assert(!cir::MissingFeatures::pgoUse()); + // If a throw expression we emit it and return an undefined lvalue + // because it can't be used. + if (auto *throwExpr = dyn_cast<CXXThrowExpr>(live->IgnoreParens())) { + cgf.emitCXXThrowExpr(throwExpr); + // Return an undefined lvalue - the throw terminates execution + // so this value will never actually be used + mlir::Type elemTy = cgf.convertType(dead->getType()); + mlir::Value undefPtr = + cgf.getBuilder().getNullPtr(cgf.getBuilder().getPointerTo(elemTy), + cgf.getLoc(throwExpr->getSourceRange())); + return cgf.makeAddrLValue(Address(undefPtr, elemTy, CharUnits::One()), + dead->getType()); + } + return cgf.emitLValue(live); +} + +/// Emit the operand of a glvalue conditional operator. This is either a glvalue +/// or a (possibly-parenthesized) throw-expression. If this is a throw, no +/// LValue is returned and the current block has been terminated. +static std::optional<LValue> emitLValueOrThrowExpression(CIRGenFunction &cgf, + const Expr *operand) { + if (auto *throwExpr = dyn_cast<CXXThrowExpr>(operand->IgnoreParens())) { + cgf.emitCXXThrowExpr(throwExpr); + return std::nullopt; + } + + return cgf.emitLValue(operand); +} +} // namespace + +// Create and generate the 3 blocks for a conditional operator. +// Leaves the 'current block' in the continuation basic block. +template <typename FuncTy> +CIRGenFunction::ConditionalInfo +CIRGenFunction::emitConditionalBlocks(const AbstractConditionalOperator *e, + const FuncTy &branchGenFunc) { + ConditionalInfo info; + ConditionalEvaluation eval(*this); + mlir::Location loc = getLoc(e->getSourceRange()); + CIRGenBuilderTy &builder = getBuilder(); + + mlir::Value condV = emitOpOnBoolExpr(loc, e->getCond()); + SmallVector<mlir::OpBuilder::InsertPoint, 2> insertPoints{}; + mlir::Type yieldTy{}; + + auto emitBranch = [&](mlir::OpBuilder &b, mlir::Location loc, + const Expr *expr, std::optional<LValue> &resultLV) { + CIRGenFunction::LexicalScope lexScope{*this, loc, b.getInsertionBlock()}; + curLexScope->setAsTernary(); + + assert(!cir::MissingFeatures::incrementProfileCounter()); + eval.beginEvaluation(); + resultLV = branchGenFunc(*this, expr); + mlir::Value resultPtr = resultLV ? resultLV->getPointer() : mlir::Value(); + eval.endEvaluation(); + + if (resultPtr) { + yieldTy = resultPtr.getType(); + cir::YieldOp::create(b, loc, resultPtr); + } else { + // If LHS or RHS is a void expression we need + // to patch arms as to properly match yield types. + // If the current block's terminator is an UnreachableOp (from a throw), + // we don't need a yield + if (builder.getInsertionBlock()->mightHaveTerminator()) { + mlir::Operation *terminator = + builder.getInsertionBlock()->getTerminator(); + if (isa_and_nonnull<cir::UnreachableOp>(terminator)) + insertPoints.push_back(b.saveInsertionPoint()); + } + } + }; + + info.result = cir::TernaryOp::create( + builder, loc, condV, + /*trueBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + emitBranch(b, loc, e->getTrueExpr(), info.lhs); + }, + /*falseBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + emitBranch(b, loc, e->getFalseExpr(), info.rhs); + }) + .getResult(); + + // If both arms are void, so be it. + if (!yieldTy) + yieldTy = VoidTy; + + // Insert required yields. + for (mlir::OpBuilder::InsertPoint &toInsert : insertPoints) { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.restoreInsertionPoint(toInsert); + + // Block does not return: build empty yield. + if (!yieldTy) { + cir::YieldOp::create(builder, loc); + } else { // Block returns: set null yield value. + mlir::Value op0 = builder.getNullValue(yieldTy, loc); + cir::YieldOp::create(builder, loc, op0); + } + } + + return info; +} + +LValue CIRGenFunction::emitConditionalOperatorLValue( + const AbstractConditionalOperator *expr) { + if (!expr->isGLValue()) { + // ?: here should be an aggregate. + assert(hasAggregateEvaluationKind(expr->getType()) && + "Unexpected conditional operator!"); + return emitAggExprToLValue(expr); + } + + OpaqueValueMapping binding(*this, expr); + if (std::optional<LValue> res = + handleConditionalOperatorLValueSimpleCase(*this, expr)) + return *res; + + ConditionalInfo info = + emitConditionalBlocks(expr, [](CIRGenFunction &cgf, const Expr *e) { + return emitLValueOrThrowExpression(cgf, e); + }); + + if ((info.lhs && !info.lhs->isSimple()) || + (info.rhs && !info.rhs->isSimple())) { + cgm.errorNYI(expr->getSourceRange(), + "unsupported conditional operator with non-simple lvalue"); + return LValue(); + } + + if (info.lhs && info.rhs) { + Address lhsAddr = info.lhs->getAddress(); + Address rhsAddr = info.rhs->getAddress(); + Address result(info.result, lhsAddr.getElementType(), + std::min(lhsAddr.getAlignment(), rhsAddr.getAlignment())); + AlignmentSource alignSource = + std::max(info.lhs->getBaseInfo().getAlignmentSource(), + info.rhs->getBaseInfo().getAlignmentSource()); + assert(!cir::MissingFeatures::opTBAA()); + return makeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource)); + } + + assert((info.lhs || info.rhs) && + "both operands of glvalue conditional are throw-expressions?"); + return info.lhs ? *info.lhs : *info.rhs; +} + /// An LValue is a candidate for having its loads and stores be made atomic if /// we are operating under /volatile:ms *and* the LValue itself is volatile and /// performing such an operation can be performed without a libcall. diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index 901b937..ddee000 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -170,19 +170,10 @@ public: void VisitConstantExpr(ConstantExpr *e) { cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitConstantExpr"); } - void VisitMemberExpr(MemberExpr *e) { - cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitMemberExpr"); - } - void VisitUnaryDeref(UnaryOperator *e) { - cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitUnaryDeref"); - } - void VisitStringLiteral(StringLiteral *e) { - cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitStringLiteral"); - } - void VisitCompoundLiteralExpr(CompoundLiteralExpr *e) { - cgf.cgm.errorNYI(e->getSourceRange(), - "AggExprEmitter: VisitCompoundLiteralExpr"); - } + void VisitMemberExpr(MemberExpr *e) { emitAggLoadOfLValue(e); } + void VisitUnaryDeref(UnaryOperator *e) { emitAggLoadOfLValue(e); } + void VisitStringLiteral(StringLiteral *e) { emitAggLoadOfLValue(e); } + void VisitCompoundLiteralExpr(CompoundLiteralExpr *e); void VisitPredefinedExpr(const PredefinedExpr *e) { cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitPredefinedExpr"); @@ -325,6 +316,31 @@ void AggExprEmitter::emitAggLoadOfLValue(const Expr *e) { emitFinalDestCopy(e->getType(), lv); } +void AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *e) { + if (dest.isPotentiallyAliased() && e->getType().isPODType(cgf.getContext())) { + // For a POD type, just emit a load of the lvalue + a copy, because our + // compound literal might alias the destination. + emitAggLoadOfLValue(e); + return; + } + + AggValueSlot slot = ensureSlot(cgf.getLoc(e->getSourceRange()), e->getType()); + + // Block-scope compound literals are destroyed at the end of the enclosing + // scope in C. + bool destruct = + !cgf.getLangOpts().CPlusPlus && !slot.isExternallyDestructed(); + if (destruct) + slot.setExternallyDestructed(); + + cgf.emitAggExpr(e->getInitializer(), slot); + + if (destruct) + if ([[maybe_unused]] QualType::DestructionKind dtorKind = + e->getType().isDestructedType()) + cgf.cgm.errorNYI(e->getSourceRange(), "compound literal with destructor"); +} + void AggExprEmitter::emitArrayInit(Address destPtr, cir::ArrayType arrayTy, QualType arrayQTy, Expr *e, ArrayRef<Expr *> args, Expr *arrayFiller) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index fcde487..d8f4943 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -196,9 +196,8 @@ public: return Visit(e->getSubExpr()); } mlir::Value VisitCXXDefaultArgExpr(CXXDefaultArgExpr *dae) { - cgf.cgm.errorNYI(dae->getExprLoc(), - "ComplexExprEmitter VisitCXXDefaultArgExpr"); - return {}; + CIRGenFunction::CXXDefaultArgExprScope scope(cgf, dae); + return Visit(dae->getExpr()); } mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) { CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index ba36cbe..25a46df 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -822,6 +822,10 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { std::string("l-value not implemented for '") + e->getStmtClassName() + "'"); return LValue(); + case Expr::ConditionalOperatorClass: + return emitConditionalOperatorLValue(cast<ConditionalOperator>(e)); + case Expr::BinaryConditionalOperatorClass: + return emitConditionalOperatorLValue(cast<BinaryConditionalOperator>(e)); case Expr::ArraySubscriptExprClass: return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e)); case Expr::UnaryOperatorClass: @@ -866,6 +870,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { return emitCastLValue(cast<CastExpr>(e)); case Expr::MaterializeTemporaryExprClass: return emitMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(e)); + case Expr::OpaqueValueExprClass: + return emitOpaqueValueLValue(cast<OpaqueValueExpr>(e)); case Expr::ChooseExprClass: return emitLValue(cast<ChooseExpr>(e)->getChosenSubExpr()); } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 3c36f5c..b8492a8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -733,6 +733,11 @@ public: SourceLocExprScopeGuard sourceLocScope; }; + struct CXXDefaultArgExprScope : SourceLocExprScopeGuard { + CXXDefaultArgExprScope(CIRGenFunction &cfg, const CXXDefaultArgExpr *e) + : SourceLocExprScopeGuard(e, cfg.curSourceLocExprScope) {} + }; + LValue makeNaturalAlignPointeeAddrLValue(mlir::Value v, clang::QualType t); LValue makeNaturalAlignAddrLValue(mlir::Value val, QualType ty); @@ -1518,6 +1523,10 @@ public: LValue emitMemberExpr(const MemberExpr *e); + LValue emitOpaqueValueLValue(const OpaqueValueExpr *e); + + LValue emitConditionalOperatorLValue(const AbstractConditionalOperator *expr); + /// Given an expression with a pointer type, emit the value and compute our /// best estimate of the alignment of the pointee. /// diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index f638d39..be063033 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -590,15 +590,18 @@ void OpenACCRecipeBuilderBase::createReductionRecipeCombiner( } else { // else we have to handle each individual field after after a // get-element. + const CIRGenRecordLayout &layout = + cgf.cgm.getTypes().getCIRGenRecordLayout(rd); for (const auto &[field, combiner] : llvm::zip_equal(rd->fields(), combinerRecipes)) { mlir::Type fieldType = cgf.convertType(field->getType()); auto fieldPtr = cir::PointerType::get(fieldType); + unsigned fieldIndex = layout.getCIRFieldNo(field); mlir::Value lhsField = builder.createGetMember( - loc, fieldPtr, lhsArg, field->getName(), field->getFieldIndex()); + loc, fieldPtr, lhsArg, field->getName(), fieldIndex); mlir::Value rhsField = builder.createGetMember( - loc, fieldPtr, rhsArg, field->getName(), field->getFieldIndex()); + loc, fieldPtr, rhsArg, field->getName(), fieldIndex); emitSingleCombiner(lhsField, rhsField, combiner); } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index 5ba64dd..ad8c4d0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -475,8 +475,8 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { } break; case cir::TEK_Complex: - getCIRGenModule().errorNYI(s.getSourceRange(), - "complex function return type"); + emitComplexExprIntoLValue(rv, makeAddrLValue(returnValue, rv->getType()), + /*isInit=*/true); break; case cir::TEK_Aggregate: assert(!cir::MissingFeatures::aggValueSlotGC()); diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index c05142e..f83a952 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -390,6 +390,8 @@ public: IsZeroed_t isZeroed() const { return IsZeroed_t(zeroedFlag); } + IsAliased_t isPotentiallyAliased() const { return IsAliased_t(aliasedFlag); } + RValue asRValue() const { if (isIgnored()) return RValue::getIgnored(); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index b4c3704..ed606b7 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1978,13 +1978,19 @@ void cir::TernaryOp::build( result.addOperands(cond); OpBuilder::InsertionGuard guard(builder); Region *trueRegion = result.addRegion(); - Block *block = builder.createBlock(trueRegion); + builder.createBlock(trueRegion); trueBuilder(builder, result.location); Region *falseRegion = result.addRegion(); builder.createBlock(falseRegion); falseBuilder(builder, result.location); - auto yield = dyn_cast<YieldOp>(block->getTerminator()); + // Get result type from whichever branch has a yield (the other may have + // unreachable from a throw expression) + auto yield = + dyn_cast_or_null<cir::YieldOp>(trueRegion->back().getTerminator()); + if (!yield) + yield = dyn_cast_or_null<cir::YieldOp>(falseRegion->back().getTerminator()); + assert((yield && yield.getNumOperands() <= 1) && "expected zero or one result type"); if (yield.getNumOperands() == 1) diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index 26e5c05..8589a2e 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -505,10 +505,19 @@ public: Block *trueBlock = &trueRegion.front(); mlir::Operation *trueTerminator = trueRegion.back().getTerminator(); rewriter.setInsertionPointToEnd(&trueRegion.back()); - auto trueYieldOp = dyn_cast<cir::YieldOp>(trueTerminator); - rewriter.replaceOpWithNewOp<cir::BrOp>(trueYieldOp, trueYieldOp.getArgs(), - continueBlock); + // Handle both yield and unreachable terminators (throw expressions) + if (auto trueYieldOp = dyn_cast<cir::YieldOp>(trueTerminator)) { + rewriter.replaceOpWithNewOp<cir::BrOp>(trueYieldOp, trueYieldOp.getArgs(), + continueBlock); + } else if (isa<cir::UnreachableOp>(trueTerminator)) { + // Terminator is unreachable (e.g., from throw), just keep it + } else { + trueTerminator->emitError("unexpected terminator in ternary true region, " + "expected yield or unreachable, got: ") + << trueTerminator->getName(); + return mlir::failure(); + } rewriter.inlineRegionBefore(trueRegion, continueBlock); Block *falseBlock = continueBlock; @@ -517,9 +526,19 @@ public: falseBlock = &falseRegion.front(); mlir::Operation *falseTerminator = falseRegion.back().getTerminator(); rewriter.setInsertionPointToEnd(&falseRegion.back()); - auto falseYieldOp = dyn_cast<cir::YieldOp>(falseTerminator); - rewriter.replaceOpWithNewOp<cir::BrOp>(falseYieldOp, falseYieldOp.getArgs(), - continueBlock); + + // Handle both yield and unreachable terminators (throw expressions) + if (auto falseYieldOp = dyn_cast<cir::YieldOp>(falseTerminator)) { + rewriter.replaceOpWithNewOp<cir::BrOp>( + falseYieldOp, falseYieldOp.getArgs(), continueBlock); + } else if (isa<cir::UnreachableOp>(falseTerminator)) { + // Terminator is unreachable (e.g., from throw), just keep it + } else { + falseTerminator->emitError("unexpected terminator in ternary false " + "region, expected yield or unreachable, got: ") + << falseTerminator->getName(); + return mlir::failure(); + } rewriter.inlineRegionBefore(falseRegion, continueBlock); rewriter.setInsertionPointToEnd(condBlock); diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 5bf1816..a012581 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -86,7 +86,7 @@ namespace { llvm::Value *StoragePtr = CGF.Builder.CreateConstGEP1_64( CGF.Int8Ty, BitFieldPtr, OffsetInChars.getQuantity()); StoragePtr = CGF.Builder.CreateAddrSpaceCast( - StoragePtr, CGF.UnqualPtrTy, "atomic_bitfield_base"); + StoragePtr, CGF.DefaultPtrTy, "atomic_bitfield_base"); BFI = OrigBFI; BFI.Offset = Offset; BFI.StorageSize = AtomicSizeInBits; diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 597127ab..20a595e 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1207,7 +1207,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, } else { // Bitcast the block literal to a generic block literal. BlockPtr = - Builder.CreatePointerCast(BlockPtr, UnqualPtrTy, "block.literal"); + Builder.CreatePointerCast(BlockPtr, DefaultPtrTy, "block.literal"); // Get pointer to the block invoke function FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 92dba32..fd14cd6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1673,7 +1673,7 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, CGF.getLLVMContext(), CGF.getContext().getTypeSize(E->getArg(1)->getType())); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false); + llvm::FunctionType::get(CGF.Int8Ty, {CGF.DefaultPtrTy, IntType}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index cb16fe1..b463f88 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -230,7 +230,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) IntTy = CGM.IntTy; SizeTy = CGM.SizeTy; VoidTy = CGM.VoidTy; - PtrTy = CGM.UnqualPtrTy; + PtrTy = CGM.DefaultPtrTy; if (CGM.getLangOpts().OffloadViaLLVM) Prefix = "llvm"; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 8439ec7..fd73314 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1756,7 +1756,7 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E, const char *Name) { ErrorUnsupported(E, Name); llvm::Type *ElTy = ConvertType(E->getType()); - llvm::Type *Ty = UnqualPtrTy; + llvm::Type *Ty = DefaultPtrTy; return MakeAddrLValue( Address(llvm::UndefValue::get(Ty), ElTy, CharUnits::One()), E->getType()); } @@ -4270,7 +4270,7 @@ void CodeGenFunction::EmitCfiCheckFail() { llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy); llvm::Value *V = Builder.CreateConstGEP2_32( - CfiCheckFailDataTy, Builder.CreatePointerCast(Data, UnqualPtrTy), 0, 0); + CfiCheckFailDataTy, Builder.CreatePointerCast(Data, DefaultPtrTy), 0, 0); Address CheckKindAddr(V, Int8Ty, getIntAlign()); llvm::Value *CheckKind = Builder.CreateLoad(CheckKindAddr); @@ -5711,7 +5711,7 @@ std::optional<LValue> HandleConditionalOperatorLValueSimpleCase( if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Live->IgnoreParens())) { CGF.EmitCXXThrowExpr(ThrowExpr); llvm::Type *ElemTy = CGF.ConvertType(Dead->getType()); - llvm::Type *Ty = CGF.UnqualPtrTy; + llvm::Type *Ty = CGF.DefaultPtrTy; return CGF.MakeAddrLValue( Address(llvm::UndefValue::get(Ty), ElemTy, CharUnits::One()), Dead->getType()); diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index c571821a..cb5bb40 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -338,7 +338,7 @@ public: /// GcReadWeakFn -- LLVM objc_read_weak (id *src) function. llvm::FunctionCallee getGcReadWeakFn() { // id objc_read_weak (id *) - llvm::Type *args[] = {CGM.UnqualPtrTy}; + llvm::Type *args[] = {CGM.DefaultPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(ObjectPtrTy, args, false); return CGM.CreateRuntimeFunction(FTy, "objc_read_weak"); } @@ -346,7 +346,7 @@ public: /// GcAssignWeakFn -- LLVM objc_assign_weak function. llvm::FunctionCallee getGcAssignWeakFn() { // id objc_assign_weak (id, id *) - llvm::Type *args[] = {ObjectPtrTy, CGM.UnqualPtrTy}; + llvm::Type *args[] = {ObjectPtrTy, CGM.DefaultPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(ObjectPtrTy, args, false); return CGM.CreateRuntimeFunction(FTy, "objc_assign_weak"); } @@ -354,7 +354,7 @@ public: /// GcAssignGlobalFn -- LLVM objc_assign_global function. llvm::FunctionCallee getGcAssignGlobalFn() { // id objc_assign_global(id, id *) - llvm::Type *args[] = {ObjectPtrTy, CGM.UnqualPtrTy}; + llvm::Type *args[] = {ObjectPtrTy, CGM.DefaultPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(ObjectPtrTy, args, false); return CGM.CreateRuntimeFunction(FTy, "objc_assign_global"); } @@ -362,7 +362,7 @@ public: /// GcAssignThreadLocalFn -- LLVM objc_assign_threadlocal function. llvm::FunctionCallee getGcAssignThreadLocalFn() { // id objc_assign_threadlocal(id src, id * dest) - llvm::Type *args[] = {ObjectPtrTy, CGM.UnqualPtrTy}; + llvm::Type *args[] = {ObjectPtrTy, CGM.DefaultPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(ObjectPtrTy, args, false); return CGM.CreateRuntimeFunction(FTy, "objc_assign_threadlocal"); } @@ -370,7 +370,7 @@ public: /// GcAssignIvarFn -- LLVM objc_assign_ivar function. llvm::FunctionCallee getGcAssignIvarFn() { // id objc_assign_ivar(id, id *, ptrdiff_t) - llvm::Type *args[] = {ObjectPtrTy, CGM.UnqualPtrTy, CGM.PtrDiffTy}; + llvm::Type *args[] = {ObjectPtrTy, CGM.DefaultPtrTy, CGM.PtrDiffTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(ObjectPtrTy, args, false); return CGM.CreateRuntimeFunction(FTy, "objc_assign_ivar"); } @@ -386,7 +386,7 @@ public: /// GcAssignStrongCastFn -- LLVM objc_assign_strongCast function. llvm::FunctionCallee getGcAssignStrongCastFn() { // id objc_assign_strongCast(id, id *) - llvm::Type *args[] = {ObjectPtrTy, CGM.UnqualPtrTy}; + llvm::Type *args[] = {ObjectPtrTy, CGM.DefaultPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(ObjectPtrTy, args, false); return CGM.CreateRuntimeFunction(FTy, "objc_assign_strongCast"); } @@ -517,7 +517,7 @@ public: /// ExceptionTryEnterFn - LLVM objc_exception_try_enter function. llvm::FunctionCallee getExceptionTryEnterFn() { - llvm::Type *params[] = {CGM.UnqualPtrTy}; + llvm::Type *params[] = {CGM.DefaultPtrTy}; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, false), "objc_exception_try_enter"); @@ -525,7 +525,7 @@ public: /// ExceptionTryExitFn - LLVM objc_exception_try_exit function. llvm::FunctionCallee getExceptionTryExitFn() { - llvm::Type *params[] = {CGM.UnqualPtrTy}; + llvm::Type *params[] = {CGM.DefaultPtrTy}; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, false), "objc_exception_try_exit"); @@ -533,7 +533,7 @@ public: /// ExceptionExtractFn - LLVM objc_exception_extract function. llvm::FunctionCallee getExceptionExtractFn() { - llvm::Type *params[] = {CGM.UnqualPtrTy}; + llvm::Type *params[] = {CGM.DefaultPtrTy}; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(ObjectPtrTy, params, false), "objc_exception_extract"); @@ -550,7 +550,7 @@ public: /// SetJmpFn - LLVM _setjmp function. llvm::FunctionCallee getSetJmpFn() { // This is specifically the prototype for x86. - llvm::Type *params[] = {CGM.UnqualPtrTy}; + llvm::Type *params[] = {CGM.DefaultPtrTy}; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp", llvm::AttributeList::get(CGM.getLLVMContext(), @@ -1927,7 +1927,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) { // If we don't already have it, construct the type for a constant NSString. if (!NSConstantStringType) { NSConstantStringType = - llvm::StructType::create({CGM.UnqualPtrTy, CGM.Int8PtrTy, CGM.IntTy}, + llvm::StructType::create({CGM.DefaultPtrTy, CGM.Int8PtrTy, CGM.IntTy}, "struct.__builtin_NSString"); } @@ -5959,7 +5959,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper( Int8PtrTy, PropertyListPtrTy); // ImpnfABITy - LLVM for id (*)(id, SEL, ...) - ImpnfABITy = CGM.UnqualPtrTy; + ImpnfABITy = CGM.DefaultPtrTy; // struct _class_t { // struct _class_t *isa; @@ -6380,7 +6380,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { CGM.getModule(), ObjCTypes.ImpnfABITy, false, llvm::GlobalValue::ExternalLinkage, nullptr, "_objc_empty_vtable"); else - ObjCEmptyVtableVar = llvm::ConstantPointerNull::get(CGM.UnqualPtrTy); + ObjCEmptyVtableVar = llvm::ConstantPointerNull::get(CGM.DefaultPtrTy); } // FIXME: Is this correct (that meta class size is never computed)? diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 1ff2be7..85b2404 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1714,12 +1714,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( // Copying constructor for the threadprivate variable. // Must be NULL - reserved by runtime, but currently it requires that this // parameter is always NULL. Otherwise it fires assertion. - CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy); + CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy); if (Ctor == nullptr) { - Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy); + Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy); } if (Dtor == nullptr) { - Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy); + Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy); } if (!CGF) { auto *InitFunctionTy = diff --git a/clang/lib/CodeGen/CGPointerAuth.cpp b/clang/lib/CodeGen/CGPointerAuth.cpp index 375f87a..dbb7bc9 100644 --- a/clang/lib/CodeGen/CGPointerAuth.cpp +++ b/clang/lib/CodeGen/CGPointerAuth.cpp @@ -426,10 +426,10 @@ CodeGenModule::getConstantSignedPointer(llvm::Constant *Pointer, unsigned Key, llvm::ConstantInt *OtherDiscriminator) { llvm::Constant *AddressDiscriminator; if (StorageAddress) { - assert(StorageAddress->getType() == UnqualPtrTy); + assert(StorageAddress->getType() == DefaultPtrTy); AddressDiscriminator = StorageAddress; } else { - AddressDiscriminator = llvm::Constant::getNullValue(UnqualPtrTy); + AddressDiscriminator = llvm::Constant::getNullValue(DefaultPtrTy); } llvm::ConstantInt *IntegerDiscriminator; diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h index e273ebe..015306b 100644 --- a/clang/lib/CodeGen/CodeGenTypeCache.h +++ b/clang/lib/CodeGen/CodeGenTypeCache.h @@ -53,7 +53,7 @@ struct CodeGenTypeCache { /// void*, void** in the target's default address space (often 0) union { - llvm::PointerType *UnqualPtrTy; + llvm::PointerType *DefaultPtrTy; llvm::PointerType *VoidPtrTy; llvm::PointerType *Int8PtrTy; llvm::PointerType *VoidPtrPtrTy; diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 9e195a9..65c4763 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -774,7 +774,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( } else { llvm::Value *VFPAddr = CGF.Builder.CreateGEP(CGF.Int8Ty, VTable, VTableOffset); - VirtualFn = CGF.Builder.CreateAlignedLoad(CGF.UnqualPtrTy, VFPAddr, + VirtualFn = CGF.Builder.CreateAlignedLoad(CGF.DefaultPtrTy, VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn"); } @@ -816,7 +816,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( // function pointer. CGF.EmitBlock(FnNonVirtual); llvm::Value *NonVirtualFn = - Builder.CreateIntToPtr(FnAsInt, CGF.UnqualPtrTy, "memptr.nonvirtualfn"); + Builder.CreateIntToPtr(FnAsInt, CGF.DefaultPtrTy, "memptr.nonvirtualfn"); // Check the function pointer if CFI on member function pointers is enabled. if (ShouldEmitCFICheck) { @@ -856,7 +856,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( // We're done. CGF.EmitBlock(FnEnd); - llvm::PHINode *CalleePtr = Builder.CreatePHI(CGF.UnqualPtrTy, 2); + llvm::PHINode *CalleePtr = Builder.CreatePHI(CGF.DefaultPtrTy, 2); CalleePtr->addIncoming(VirtualFn, FnVirtual); CalleePtr->addIncoming(NonVirtualFn, FnNonVirtual); @@ -1403,7 +1403,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, // Grab the vtable pointer as an intptr_t*. auto *ClassDecl = ElementType->castAsCXXRecordDecl(); - llvm::Value *VTable = CGF.GetVTablePtr(Ptr, CGF.UnqualPtrTy, ClassDecl); + llvm::Value *VTable = CGF.GetVTablePtr(Ptr, CGF.DefaultPtrTy, ClassDecl); // Track back to entry -2 and pull out the offset there. llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64( @@ -1749,7 +1749,7 @@ llvm::Value *ItaniumCXXABI::emitExactDynamicCast( auto AuthenticateVTable = [&](Address ThisAddr, const CXXRecordDecl *Decl) { if (!CGF.getLangOpts().PointerAuthCalls) return; - (void)CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, Decl, + (void)CGF.GetVTablePtr(ThisAddr, CGF.DefaultPtrTy, Decl, CodeGenFunction::VTableAuthMode::MustTrap); }; @@ -1775,7 +1775,7 @@ llvm::Value *ItaniumCXXABI::emitExactDynamicCast( if (PerformPostCastAuthentication) VTable = CGF.EmitPointerAuthAuth(StrippingAuthInfo, VTable); } else - VTable = CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, SrcDecl); + VTable = CGF.GetVTablePtr(ThisAddr, CGF.DefaultPtrTy, SrcDecl); // Compare the vptr against the expected vptr for the destination type at // this offset. @@ -1828,7 +1828,7 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF, if (CGM.getItaniumVTableContext().isRelativeLayout()) { // Get the vtable pointer. llvm::Value *VTable = - CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, ClassDecl); + CGF.GetVTablePtr(ThisAddr, CGF.DefaultPtrTy, ClassDecl); // Get the offset-to-top from the vtable. OffsetToTop = @@ -1841,7 +1841,7 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF, // Get the vtable pointer. llvm::Value *VTable = - CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, ClassDecl); + CGF.GetVTablePtr(ThisAddr, CGF.DefaultPtrTy, ClassDecl); // Get the offset-to-top from the vtable. OffsetToTop = @@ -2578,7 +2578,7 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, // We can't simply ignore this load using nosanitize metadata because // the metadata may be lost. llvm::FunctionType *FTy = - llvm::FunctionType::get(CGF.SizeTy, CGF.UnqualPtrTy, false); + llvm::FunctionType::get(CGF.SizeTy, CGF.DefaultPtrTy, false); llvm::FunctionCallee F = CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie"); return CGF.Builder.CreateCall(F, numElementsPtr.emitRawPointer(CGF)); @@ -2921,7 +2921,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // We're assuming that the destructor function is something we can // reasonably call with the default CC. - llvm::Type *dtorTy = CGF.UnqualPtrTy; + llvm::Type *dtorTy = CGF.DefaultPtrTy; // Preserve address space of addr. auto AddrAS = addr ? addr->getType()->getPointerAddressSpace() : 0; @@ -5035,7 +5035,7 @@ static void InitCatchParam(CodeGenFunction &CGF, auto catchRD = CatchType->getAsCXXRecordDecl(); CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD); - llvm::Type *PtrTy = CGF.UnqualPtrTy; // addrspace 0 ok + llvm::Type *PtrTy = CGF.DefaultPtrTy; // Check for a copy expression. If we don't have a copy expression, // that means a trivial copy is okay. @@ -5244,7 +5244,7 @@ void XLCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::FunctionCallee Dtor, llvm::Constant *Addr) { if (D.getTLSKind() != VarDecl::TLS_None) { - llvm::PointerType *PtrTy = CGF.UnqualPtrTy; + llvm::PointerType *PtrTy = CGF.DefaultPtrTy; // extern "C" int __pt_atexit_np(int flags, int(*)(int,...), ...); llvm::FunctionType *AtExitTy = diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 19d9265..71e2449 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -528,7 +528,7 @@ public: CGM.IntTy, CGM.IntTy, CGM.IntTy, - getImageRelativeType(CGM.UnqualPtrTy), + getImageRelativeType(CGM.DefaultPtrTy), }; BaseClassDescriptorType = llvm::StructType::create( CGM.getLLVMContext(), FieldTypes, "rtti.BaseClassDescriptor"); @@ -540,7 +540,7 @@ public: return ClassHierarchyDescriptorType; // Forward-declare RTTIClassHierarchyDescriptor to break a cycle. llvm::Type *FieldTypes[] = {CGM.IntTy, CGM.IntTy, CGM.IntTy, - getImageRelativeType(CGM.UnqualPtrTy)}; + getImageRelativeType(CGM.DefaultPtrTy)}; ClassHierarchyDescriptorType = llvm::StructType::create(FieldTypes, "rtti.ClassHierarchyDescriptor"); return ClassHierarchyDescriptorType; @@ -554,7 +554,7 @@ public: CGM.IntTy, CGM.IntTy, getImageRelativeType(CGM.Int8PtrTy), - getImageRelativeType(CGM.UnqualPtrTy), + getImageRelativeType(CGM.DefaultPtrTy), getImageRelativeType(CGM.VoidTy), }; llvm::ArrayRef<llvm::Type *> FieldTypesRef(FieldTypes); @@ -752,7 +752,7 @@ public: llvm::SmallString<23> CTATypeName("eh.CatchableTypeArray."); CTATypeName += llvm::utostr(NumEntries); - llvm::Type *CTType = getImageRelativeType(CGM.UnqualPtrTy); + llvm::Type *CTType = getImageRelativeType(CGM.DefaultPtrTy); llvm::Type *FieldTypes[] = { CGM.IntTy, // NumEntries llvm::ArrayType::get(CTType, NumEntries) // CatchableTypes @@ -779,7 +779,7 @@ public: llvm::FunctionCallee getThrowFn() { // _CxxThrowException is passed an exception object and a ThrowInfo object // which describes the exception. - llvm::Type *Args[] = {CGM.Int8PtrTy, CGM.UnqualPtrTy}; + llvm::Type *Args[] = {CGM.Int8PtrTy, CGM.DefaultPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false); llvm::FunctionCallee Throw = @@ -920,7 +920,7 @@ void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { llvm::Value *Args[] = {llvm::ConstantPointerNull::get(CGM.Int8PtrTy), - llvm::ConstantPointerNull::get(CGM.UnqualPtrTy)}; + llvm::ConstantPointerNull::get(CGM.DefaultPtrTy)}; llvm::FunctionCallee Fn = getThrowFn(); if (isNoReturn) CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, Args); @@ -1969,13 +1969,13 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, SourceLocation Loc) { CGBuilderTy &Builder = CGF.Builder; - Ty = CGF.UnqualPtrTy; + Ty = CGF.DefaultPtrTy; Address VPtr = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true); auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl()); llvm::Value *VTable = - CGF.GetVTablePtr(VPtr, CGF.UnqualPtrTy, MethodDecl->getParent()); + CGF.GetVTablePtr(VPtr, CGF.DefaultPtrTy, MethodDecl->getParent()); MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext(); MethodVFTableLocation ML = VFTContext.getMethodVFTableLocation(GD); @@ -2136,9 +2136,9 @@ MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD, // Load the vfptr and then callee from the vftable. The callee should have // adjusted 'this' so that the vfptr is at offset zero. - llvm::Type *ThunkPtrTy = CGF.UnqualPtrTy; + llvm::Type *ThunkPtrTy = CGF.DefaultPtrTy; llvm::Value *VTable = - CGF.GetVTablePtr(getThisAddress(CGF), CGF.UnqualPtrTy, MD->getParent()); + CGF.GetVTablePtr(getThisAddress(CGF), CGF.DefaultPtrTy, MD->getParent()); llvm::Value *VFuncPtr = CGF.Builder.CreateConstInBoundsGEP1_64( ThunkPtrTy, VTable, ML.Index, "vfn"); @@ -2562,7 +2562,7 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) { static llvm::FunctionCallee getInitThreadHeaderFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), - CGM.UnqualPtrTy, /*isVarArg=*/false); + CGM.DefaultPtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_header", llvm::AttributeList::get(CGM.getLLVMContext(), @@ -2574,7 +2574,7 @@ static llvm::FunctionCallee getInitThreadHeaderFn(CodeGenModule &CGM) { static llvm::FunctionCallee getInitThreadFooterFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), - CGM.UnqualPtrTy, /*isVarArg=*/false); + CGM.DefaultPtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_footer", llvm::AttributeList::get(CGM.getLLVMContext(), @@ -2586,7 +2586,7 @@ static llvm::FunctionCallee getInitThreadFooterFn(CodeGenModule &CGM) { static llvm::FunctionCallee getInitThreadAbortFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), - CGM.UnqualPtrTy, /*isVarArg=*/false); + CGM.DefaultPtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_abort", llvm::AttributeList::get(CGM.getLLVMContext(), @@ -3169,7 +3169,7 @@ MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF, } llvm::Value *VBTable = - Builder.CreateAlignedLoad(CGM.UnqualPtrTy, VBPtr, VBPtrAlign, "vbtable"); + Builder.CreateAlignedLoad(CGM.DefaultPtrTy, VBPtr, VBPtrAlign, "vbtable"); // Translate from byte offset to table index. It improves analyzability. llvm::Value *VBTableIndex = Builder.CreateAShr( @@ -3825,7 +3825,7 @@ MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) { // mode) bytes of padding. We provide a pointer sized amount of padding by // adding +1 to Classes.size(). The sections have pointer alignment and are // marked pick-any so it shouldn't matter. - llvm::Type *PtrType = ABI.getImageRelativeType(CGM.UnqualPtrTy); + llvm::Type *PtrType = ABI.getImageRelativeType(CGM.DefaultPtrTy); auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1); auto *BCA = new llvm::GlobalVariable(Module, ArrType, @@ -4372,7 +4372,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) { CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy)); uint32_t NumEntries = CatchableTypes.size(); - llvm::Type *CTType = getImageRelativeType(CGM.UnqualPtrTy); + llvm::Type *CTType = getImageRelativeType(CGM.DefaultPtrTy); llvm::ArrayType *AT = llvm::ArrayType::get(CTType, NumEntries); llvm::StructType *CTAType = getCatchableTypeArrayType(NumEntries); llvm::Constant *Fields[] = { diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 2429a43..60f9b86 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -2037,7 +2037,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld1q_x3_v: case NEON::BI__builtin_neon_vld1_x4_v: case NEON::BI__builtin_neon_vld1q_x4_v: { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); @@ -2263,11 +2263,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( // in AArch64 it comes last. We may want to stick to one or another. if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || Arch == llvm::Triple::aarch64_32) { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); } - llvm::Type *Tys[2] = {UnqualPtrTy, VTy}; + llvm::Type *Tys[2] = {DefaultPtrTy, VTy}; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); } case NEON::BI__builtin_neon_vsubhn_v: { @@ -2858,7 +2858,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic( BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex, - UnqualPtrTy); + DefaultPtrTy); CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); Val->addParamAttr( 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); @@ -5225,7 +5225,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr, - UnqualPtrTy); + DefaultPtrTy); CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); Val->addParamAttr( 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); @@ -7482,42 +7482,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_dup_v: case NEON::BI__builtin_neon_vld2q_dup_v: { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_dup_v: case NEON::BI__builtin_neon_vld3q_dup_v: { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_dup_v: case NEON::BI__builtin_neon_vld4q_dup_v: { - llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; + llvm::Type *Tys[2] = {VTy, DefaultPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp index e71dc9e..44d5938 100644 --- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp @@ -59,7 +59,7 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, Constraints += MachineClobbers; } - llvm::Type *PtrType = CGF.UnqualPtrTy; + llvm::Type *PtrType = CGF.DefaultPtrTy; llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); llvm::InlineAsm *IA = diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index 380e8c0..35e7655 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -490,7 +490,7 @@ RValue PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, llvm::Type *DirectTy = CGF.ConvertType(Ty), *ElementTy = DirectTy; if (isIndirect) - DirectTy = CGF.UnqualPtrTy; + DirectTy = CGF.DefaultPtrTy; // Case 1: consume registers. Address RegAddr = Address::invalid(); diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 838e087..df10458 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -61,10 +61,6 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) { if (Version == Distro::UnknownDistro && Line.starts_with("DISTRIB_CODENAME=")) Version = llvm::StringSwitch<Distro::DistroType>(Line.substr(17)) - .Case("maverick", Distro::UbuntuMaverick) - .Case("natty", Distro::UbuntuNatty) - .Case("oneiric", Distro::UbuntuOneiric) - .Case("precise", Distro::UbuntuPrecise) .Case("quantal", Distro::UbuntuQuantal) .Case("raring", Distro::UbuntuRaring) .Case("saucy", Distro::UbuntuSaucy) @@ -120,13 +116,17 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { if (Data.starts_with("Fedora release")) return Distro::Fedora; if (Data.starts_with("Red Hat Enterprise Linux") || - Data.starts_with("CentOS") || Data.starts_with("Scientific Linux")) { + Data.starts_with("CentOS") || Data.starts_with("AlmaLinux") || + Data.starts_with("Rocky Linux") || + Data.starts_with("Scientific Linux")) { + if (Data.contains("release 10")) + return Distro::RHEL10; + if (Data.contains("release 9")) + return Distro::RHEL9; + if (Data.contains("release 8")) + return Distro::RHEL8; if (Data.contains("release 7")) return Distro::RHEL7; - else if (Data.contains("release 6")) - return Distro::RHEL6; - else if (Data.contains("release 5")) - return Distro::RHEL5; } return Distro::UnknownDistro; } @@ -139,12 +139,6 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { int MajorVersion; if (!Data.split('.').first.getAsInteger(10, MajorVersion)) { switch (MajorVersion) { - case 5: - return Distro::DebianLenny; - case 6: - return Distro::DebianSqueeze; - case 7: - return Distro::DebianWheezy; case 8: return Distro::DebianJessie; case 9: @@ -166,8 +160,6 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { } } return llvm::StringSwitch<Distro::DistroType>(Data.split("\n").first) - .Case("squeeze/sid", Distro::DebianSqueeze) - .Case("wheezy/sid", Distro::DebianWheezy) .Case("jessie/sid", Distro::DebianJessie) .Case("stretch/sid", Distro::DebianStretch) .Case("buster/sid", Distro::DebianBuster) diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp index bac8681..227c6a0 100644 --- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp @@ -482,9 +482,9 @@ bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName, return false; return llvm::StringSwitch<bool>(CPUName) - .Cases("mips2", "mips3", "mips4", "mips5", true) - .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true) - .Cases("mips64", "mips64r2", "mips64r3", "mips64r5", true) + .Cases({"mips2", "mips3", "mips4", "mips5"}, true) + .Cases({"mips32", "mips32r2", "mips32r3", "mips32r5"}, true) + .Cases({"mips64", "mips64r2", "mips64r3", "mips64r5"}, true) .Default(false); } diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index d2356eb..cc5bcd1 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -51,15 +51,15 @@ llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) { // translation. return llvm::StringSwitch<llvm::Triple::ArchType>(Str) - .Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86) - .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4", + .Cases({"i386", "i486", "i486SX", "i586", "i686"}, llvm::Triple::x86) + .Cases({"pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4"}, llvm::Triple::x86) - .Cases("x86_64", "x86_64h", llvm::Triple::x86_64) + .Cases({"x86_64", "x86_64h"}, llvm::Triple::x86_64) // This is derived from the driver. - .Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm) - .Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm) - .Cases("armv7s", "xscale", llvm::Triple::arm) - .Cases("arm64", "arm64e", llvm::Triple::aarch64) + .Cases({"arm", "armv4t", "armv5", "armv6", "armv6m"}, llvm::Triple::arm) + .Cases({"armv7", "armv7em", "armv7k", "armv7m"}, llvm::Triple::arm) + .Cases({"armv7s", "xscale"}, llvm::Triple::arm) + .Cases({"arm64", "arm64e"}, llvm::Triple::aarch64) .Case("arm64_32", llvm::Triple::aarch64_32) .Case("r600", llvm::Triple::r600) .Case("amdgcn", llvm::Triple::amdgcn) diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index b004d73..65fc65e 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -506,15 +506,15 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, MatchedIndices.clear(); }; - unsigned i = StartAt; - for (unsigned e = Changes.size(); i != e; ++i) { - auto &CurrentChange = Changes[i]; + unsigned I = StartAt; + for (unsigned E = Changes.size(); I != E; ++I) { + auto &CurrentChange = Changes[I]; if (CurrentChange.indentAndNestingLevel() < IndentAndNestingLevel) break; if (CurrentChange.NewlinesBefore != 0) { CommasBeforeMatch = 0; - EndOfSequence = i; + EndOfSequence = I; // Whether to break the alignment sequence because of an empty line. bool EmptyLineBreak = @@ -530,8 +530,8 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, // A new line starts, re-initialize line status tracking bools. // Keep the match state if a string literal is continued on this line. - if (i == 0 || CurrentChange.Tok->isNot(tok::string_literal) || - Changes[i - 1].Tok->isNot(tok::string_literal)) { + if (I == 0 || CurrentChange.Tok->isNot(tok::string_literal) || + Changes[I - 1].Tok->isNot(tok::string_literal)) { FoundMatchOnLine = false; } LineIsComment = true; @@ -547,8 +547,8 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, IndentAndNestingLevel) { // Call AlignTokens recursively, skipping over this scope block. const auto StoppedAt = - AlignTokens(Style, Matches, Changes, i, ACS, RightJustify); - i = StoppedAt - 1; + AlignTokens(Style, Matches, Changes, I, ACS, RightJustify); + I = StoppedAt - 1; continue; } } @@ -559,7 +559,7 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, // If there is more than one matching token per line, or if the number of // preceding commas, do not match anymore, end the sequence. if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) { - MatchedIndices.push_back(i); + MatchedIndices.push_back(I); AlignCurrentSequence(); } @@ -567,29 +567,69 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, FoundMatchOnLine = true; if (StartOfSequence == 0) - StartOfSequence = i; + StartOfSequence = I; unsigned ChangeWidthLeft = CurrentChange.StartOfTokenColumn; unsigned ChangeWidthAnchor = 0; unsigned ChangeWidthRight = 0; + unsigned CurrentChangeWidthRight = 0; if (RightJustify) if (ACS.PadOperators) ChangeWidthAnchor = CurrentChange.TokenLength; else ChangeWidthLeft += CurrentChange.TokenLength; else - ChangeWidthRight = CurrentChange.TokenLength; - for (unsigned j = i + 1; j != e && Changes[j].NewlinesBefore == 0; ++j) { - ChangeWidthRight += Changes[j].Spaces; + CurrentChangeWidthRight = CurrentChange.TokenLength; + const FormatToken *MatchingParenToEncounter = nullptr; + for (unsigned J = I + 1; + J != E && (Changes[J].NewlinesBefore == 0 || MatchingParenToEncounter); + ++J) { + const auto &Change = Changes[J]; + const auto *Tok = Change.Tok; + + if (Tok->MatchingParen) { + if (Tok->isOneOf(tok::l_paren, tok::l_brace, tok::l_square, + TT_TemplateOpener) && + !MatchingParenToEncounter) { + // If the next token is on the next line, we probably don't need to + // check the following lengths, because it most likely isn't aligned + // with the rest. + if (J + 1 != E && Changes[J + 1].NewlinesBefore == 0) + MatchingParenToEncounter = Tok->MatchingParen; + } else if (MatchingParenToEncounter == Tok->MatchingParen) { + MatchingParenToEncounter = nullptr; + } + } + + if (Change.NewlinesBefore != 0) { + ChangeWidthRight = std::max(ChangeWidthRight, CurrentChangeWidthRight); + const auto ChangeWidthStart = ChangeWidthLeft + ChangeWidthAnchor; + // If the position of the current token is columnwise before the begin + // of the alignment, we drop out here, because the next line does not + // have to be moved with the previous one(s) for the alignment. E.g.: + // int i1 = 1; | <- ColumnLimit | int i1 = 1; + // int j = 0; | Without the break -> | int j = 0; + // int k = bar( | We still want to align the = | int k = bar( + // argument1, | here, even if we can't move | argument1, + // argument2); | the following lines. | argument2); + if (static_cast<unsigned>(Change.Spaces) < ChangeWidthStart) + break; + CurrentChangeWidthRight = Change.Spaces - ChangeWidthStart; + } else { + CurrentChangeWidthRight += Change.Spaces; + } + // Changes are generally 1:1 with the tokens, but a change could also be // inside of a token, in which case it's counted more than once: once for // the whitespace surrounding the token (!IsInsideToken) and once for // each whitespace change within it (IsInsideToken). // Therefore, changes inside of a token should only count the space. - if (!Changes[j].IsInsideToken) - ChangeWidthRight += Changes[j].TokenLength; + if (!Change.IsInsideToken) + CurrentChangeWidthRight += Change.TokenLength; } + ChangeWidthRight = std::max(ChangeWidthRight, CurrentChangeWidthRight); + // If we are restricted by the maximum column width, end the sequence. unsigned NewLeft = std::max(ChangeWidthLeft, WidthLeft); unsigned NewAnchor = std::max(ChangeWidthAnchor, WidthAnchor); @@ -598,7 +638,7 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, if (Style.ColumnLimit != 0 && Style.ColumnLimit < NewLeft + NewAnchor + NewRight) { AlignCurrentSequence(); - StartOfSequence = i; + StartOfSequence = I; WidthLeft = ChangeWidthLeft; WidthAnchor = ChangeWidthAnchor; WidthRight = ChangeWidthRight; @@ -607,12 +647,12 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, WidthAnchor = NewAnchor; WidthRight = NewRight; } - MatchedIndices.push_back(i); + MatchedIndices.push_back(I); } - EndOfSequence = i; + EndOfSequence = I; AlignCurrentSequence(); - return i; + return I; } // Aligns a sequence of matching tokens, on the MinColumn column. diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 1b63c40..0daa20a 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -629,7 +629,7 @@ static std::error_code collectModuleHeaderIncludes( // Check whether this entry has an extension typically associated with // headers. if (!llvm::StringSwitch<bool>(llvm::sys::path::extension(Dir->path())) - .Cases(".h", ".H", ".hh", ".hpp", true) + .Cases({".h", ".H", ".hh", ".hpp"}, true) .Default(false)) continue; diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index fb0ece9..19ce7a5 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -26,6 +26,7 @@ #define __managed__ __attribute__((managed)) #define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__))) +#define __no_cluster__ __attribute__((no_cluster)) #if !defined(__cplusplus) || __cplusplus < 201103L #define nullptr NULL; diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index d35bc0e..fdb825f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1298,9 +1298,8 @@ _mm256_min_epu32(__m256i __a, __m256i __b) { /// \param __a /// A 256-bit integer vector containing the source bytes. /// \returns The 32-bit integer mask. -static __inline__ int __DEFAULT_FN_ATTRS256 -_mm256_movemask_epi8(__m256i __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 3681cca..fef1a2d 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1200,10 +1200,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { (__v8di)_mm512_setzero_si512()); } -#define _mm512_extractf32x8_ps(A, imm) \ - ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)_mm256_undefined_ps(), \ - (__mmask8)-1)) +#define _mm512_extractf32x8_ps(A, imm) \ + ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ @@ -1215,11 +1215,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) -#define _mm512_extractf64x2_pd(A, imm) \ - ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1)) +#define _mm512_extractf64x2_pd(A, imm) \ + ((__m128d)__builtin_ia32_extractf64x2_512_mask( \ + (__v8df)(__m512d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ @@ -1233,10 +1232,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) -#define _mm512_extracti32x8_epi32(A, imm) \ - ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1)) +#define _mm512_extracti32x8_epi32(A, imm) \ + ((__m256i)__builtin_ia32_extracti32x8_mask( \ + (__v16si)(__m512i)(A), (int)(imm), (__v8si)_mm256_setzero_si256(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ @@ -1248,11 +1247,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U))) -#define _mm512_extracti64x2_epi64(A, imm) \ - ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ - (int)(imm), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm512_extracti64x2_epi64(A, imm) \ + ((__m128i)__builtin_ia32_extracti64x2_512_mask( \ + (__v8di)(__m512i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 07de036..18c4a44 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3156,10 +3156,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, (__v16si)_mm512_setzero_si512())) /* Vector Extract */ -#define _mm512_extractf64x4_pd(A, I) \ - ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ - (__v4df)_mm256_undefined_pd(), \ - (__mmask8)-1)) +#define _mm512_extractf64x4_pd(A, I) \ + ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ @@ -3171,10 +3171,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) -#define _mm512_extractf32x4_ps(A, I) \ - ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1)) +#define _mm512_extractf32x4_ps(A, I) \ + ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ @@ -7089,10 +7089,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } -#define _mm512_extracti32x4_epi32(A, imm) \ - ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm512_extracti32x4_epi32(A, imm) \ + ((__m128i)__builtin_ia32_extracti32x4_mask( \ + (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ @@ -7104,10 +7104,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (__v4si)_mm_setzero_si128(), \ (__mmask8)(U))) -#define _mm512_extracti64x4_epi64(A, imm) \ +#define _mm512_extracti64x4_epi64(A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1)) + (__v4di)_mm256_setzero_si256(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index ee7974e..707d039 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -1062,11 +1062,10 @@ _mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { (__v4di)_mm256_setzero_si256()); } -#define _mm256_extractf64x2_pd(A, imm) \ - ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1)) +#define _mm256_extractf64x2_pd(A, imm) \ + ((__m128d)__builtin_ia32_extractf64x2_256_mask( \ + (__v4df)(__m256d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \ + (__mmask8) - 1)) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ @@ -1080,11 +1079,10 @@ _mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) -#define _mm256_extracti64x2_epi64(A, imm) \ - ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ - (int)(imm), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm256_extracti64x2_epi64(A, imm) \ + ((__m128i)__builtin_ia32_extracti64x2_256_mask( \ + (__v4di)(__m256i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 676b5a0..92bb444 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7545,11 +7545,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -#define _mm256_extractf32x4_ps(A, imm) \ - ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1)) +#define _mm256_extractf32x4_ps(A, imm) \ + ((__m128)__builtin_ia32_extractf32x4_256_mask( \ + (__v8sf)(__m256)(A), (int)(imm), (__v4sf)_mm_setzero_ps(), \ + (__mmask8) - 1)) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ @@ -7563,11 +7562,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) -#define _mm256_extracti32x4_epi32(A, imm) \ - ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm256_extracti32x4_epi32(A, imm) \ + ((__m128i)__builtin_ia32_extracti32x4_256_mask( \ + (__v8si)(__m256i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 696ec31..4aef924 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2941,9 +2941,8 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b) { /// A 256-bit vector of [4 x double] containing the double-precision /// floating point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [3:0]. -static __inline int __DEFAULT_FN_ATTRS -_mm256_movemask_pd(__m256d __a) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_movemask_pd(__m256d __a) { return __builtin_ia32_movmskpd256((__v4df)__a); } @@ -2959,9 +2958,8 @@ _mm256_movemask_pd(__m256d __a) /// A 256-bit vector of [8 x float] containing the single-precision floating /// point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [7:0]. -static __inline int __DEFAULT_FN_ATTRS -_mm256_movemask_ps(__m256 __a) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_movemask_ps(__m256 __a) { return __builtin_ia32_movmskps256((__v8sf)__a); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 454e9a2..dbe5ca0 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4280,7 +4280,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b) { /// A 128-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bits from each 8-bit element in \a __a, /// written to bits [15:0]. The other bits are assigned zeros. -static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movemask_epi8(__m128i __a) { return __builtin_ia32_pmovmskb128((__v16qi)__a); } @@ -4699,7 +4700,8 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) { /// be extracted. /// \returns The sign bits from each of the double-precision elements in \a __a, /// written to bits [1:0]. The remaining bits are assigned values of zero. -static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movemask_pd(__m128d __a) { return __builtin_ia32_movmskpd((__v2df)__a); } diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 4f197d5..511a135 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1524,7 +1524,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) { /// \returns A 128-bit value where bits [15:0] contain the minimum value found /// in parameter \a __V, bits [18:16] contain the index of the minimum value /// and the remaining bits are set to 0. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_minpos_epu16(__m128i __V) { return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 605409c..fe6afdc 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -3014,9 +3014,7 @@ _mm_cvtps_pi8(__m128 __a) /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each /// single-precision floating-point element of the parameter. Bits [31:4] are /// set to zero. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_movemask_ps(__m128 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) { return __builtin_ia32_movmskps((__v4sf)__a); } diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index d8f61c0..b014124 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -302,7 +302,7 @@ void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) { // Check whether this entry has an extension typically associated with // headers. if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->path())) - .Cases(".h", ".H", ".hh", ".hpp", true) + .Cases({".h", ".H", ".hh", ".hpp"}, true) .Default(false)) continue; diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 9893381..7c2b928 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -1419,10 +1419,11 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) { // Return a valid hint if pragma unroll or nounroll were specified // without an argument. - auto IsLoopHint = llvm::StringSwitch<bool>(PragmaNameInfo->getName()) - .Cases("unroll", "nounroll", "unroll_and_jam", - "nounroll_and_jam", true) - .Default(false); + auto IsLoopHint = + llvm::StringSwitch<bool>(PragmaNameInfo->getName()) + .Cases({"unroll", "nounroll", "unroll_and_jam", "nounroll_and_jam"}, + true) + .Default(false); if (Toks.empty() && IsLoopHint) { ConsumeAnnotationToken(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ef1be23..2990fd6 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -6926,13 +6926,13 @@ StringRef Sema::GetFormatStringTypeName(FormatStringType FST) { FormatStringType Sema::GetFormatStringType(StringRef Flavor) { return llvm::StringSwitch<FormatStringType>(Flavor) - .Cases("gnu_scanf", "scanf", FormatStringType::Scanf) - .Cases("gnu_printf", "printf", "printf0", "syslog", + .Cases({"gnu_scanf", "scanf"}, FormatStringType::Scanf) + .Cases({"gnu_printf", "printf", "printf0", "syslog"}, FormatStringType::Printf) - .Cases("NSString", "CFString", FormatStringType::NSString) - .Cases("gnu_strftime", "strftime", FormatStringType::Strftime) - .Cases("gnu_strfmon", "strfmon", FormatStringType::Strfmon) - .Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", + .Cases({"NSString", "CFString"}, FormatStringType::NSString) + .Cases({"gnu_strftime", "strftime"}, FormatStringType::Strftime) + .Cases({"gnu_strfmon", "strfmon"}, FormatStringType::Strfmon) + .Cases({"kprintf", "cmn_err", "vcmn_err", "zcmn_err"}, FormatStringType::Kprintf) .Case("freebsd_kprintf", FormatStringType::FreeBSDKPrintf) .Case("os_trace", FormatStringType::OSLog) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index dca9d6e..a50c276 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -12811,7 +12811,7 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(DC)) { if (CTSD->isInStdNamespace() && llvm::StringSwitch<bool>(CTSD->getName()) - .Cases("less", "less_equal", "greater", "greater_equal", true) + .Cases({"less", "less_equal", "greater", "greater_equal"}, true) .Default(false)) { if (RHSType->isNullPtrType()) RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index f7974eb..7debe33 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -672,11 +672,12 @@ ExprResult InitListChecker::PerformEmptyInit(SourceLocation Loc, IsInStd = true; } - if (IsInStd && llvm::StringSwitch<bool>(R->getName()) - .Cases("basic_string", "deque", "forward_list", true) - .Cases("list", "map", "multimap", "multiset", true) - .Cases("priority_queue", "queue", "set", "stack", true) - .Cases("unordered_map", "unordered_set", "vector", true) + if (IsInStd && + llvm::StringSwitch<bool>(R->getName()) + .Cases({"basic_string", "deque", "forward_list"}, true) + .Cases({"list", "map", "multimap", "multiset"}, true) + .Cases({"priority_queue", "queue", "set", "stack"}, true) + .Cases({"unordered_map", "unordered_set", "vector"}, true) .Default(false)) { InitSeq.InitializeFrom( SemaRef, Entity, diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index ca99834..3bb8080 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2996,6 +2996,8 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( case OpenACCReductionOperator::Max: case OpenACCReductionOperator::Min: + BinOp = BinaryOperatorKind::BO_LT; + break; case OpenACCReductionOperator::And: case OpenACCReductionOperator::Or: // We just want a 'NYI' error in the backend, so leave an empty combiner @@ -3011,26 +3013,80 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( assert(!VarTy->isArrayType() && "Only 1 level of array allowed"); + enum class CombinerFailureKind { + None = 0, + BinOp = 1, + Conditional = 2, + Assignment = 3, + }; + + auto genCombiner = [&, this](DeclRefExpr *LHSDRE, DeclRefExpr *RHSDRE) + -> std::pair<ExprResult, CombinerFailureKind> { + ExprResult BinOpRes = + SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, RHSDRE, + /*ForFoldExpr=*/false); + switch (ReductionOperator) { + case OpenACCReductionOperator::Addition: + case OpenACCReductionOperator::Multiplication: + case OpenACCReductionOperator::BitwiseAnd: + case OpenACCReductionOperator::BitwiseOr: + case OpenACCReductionOperator::BitwiseXOr: + // These 5 are simple and are being done as compound operators, so we can + // immediately quit here. + return {BinOpRes, BinOpRes.isUsable() ? CombinerFailureKind::None + : CombinerFailureKind::BinOp}; + case OpenACCReductionOperator::Max: + case OpenACCReductionOperator::Min: { + // These are done as: + // LHS = (LHS < RHS) ? LHS : RHS; and LHS = (LHS < RHS) ? RHS : LHS; + // + // The BinOpRes should have been created with the less-than, so we just + // have to build the conditional and assignment. + if (!BinOpRes.isUsable()) + return {BinOpRes, CombinerFailureKind::BinOp}; + + // Create the correct conditional operator, swapping the results + // (true/false value) depending on min/max. + ExprResult CondRes; + if (ReductionOperator == OpenACCReductionOperator::Min) + CondRes = SemaRef.ActOnConditionalOp(Loc, Loc, BinOpRes.get(), LHSDRE, + RHSDRE); + else + CondRes = SemaRef.ActOnConditionalOp(Loc, Loc, BinOpRes.get(), RHSDRE, + LHSDRE); + + if (!CondRes.isUsable()) + return {CondRes, CombinerFailureKind::Conditional}; + + // Build assignment. + ExprResult Assignment = SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, + BinaryOperatorKind::BO_Assign, + LHSDRE, CondRes.get(), + /*ForFoldExpr=*/false); + return {Assignment, Assignment.isUsable() + ? CombinerFailureKind::None + : CombinerFailureKind::Assignment}; + } + case OpenACCReductionOperator::And: + case OpenACCReductionOperator::Or: + llvm_unreachable("And/Or not implemented, but should fail earlier"); + case OpenACCReductionOperator::Invalid: + llvm_unreachable("Invalid should have been caught above"); + } + }; + auto tryCombiner = [&, this](DeclRefExpr *LHSDRE, DeclRefExpr *RHSDRE, bool IncludeTrap) { - // TODO: OpenACC: we have to figure out based on the bin-op how to do the - // ones that we can't just use compound operators for. So &&, ||, max, and - // min aren't really clear what we could do here. if (IncludeTrap) { // Trap all of the errors here, we'll emit our own at the end. Sema::TentativeAnalysisScope Trap{SemaRef}; - - return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, - RHSDRE, - /*ForFoldExpr=*/false); - } else { - return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, - RHSDRE, - /*ForFoldExpr=*/false); + return genCombiner(LHSDRE, RHSDRE); } + return genCombiner(LHSDRE, RHSDRE); }; struct CombinerAttemptTy { + CombinerFailureKind FailKind; VarDecl *LHS; DeclRefExpr *LHSDRE; VarDecl *RHS; @@ -3058,9 +3114,11 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( RHSDecl->getBeginLoc()}, Ty, clang::VK_LValue, RHSDecl, nullptr, NOUR_None); - ExprResult BinOpResult = tryCombiner(LHSDRE, RHSDRE, /*IncludeTrap=*/true); + std::pair<ExprResult, CombinerFailureKind> BinOpResult = + tryCombiner(LHSDRE, RHSDRE, /*IncludeTrap=*/true); - return {LHSDecl, LHSDRE, RHSDecl, RHSDRE, BinOpResult.get()}; + return {BinOpResult.second, LHSDecl, LHSDRE, RHSDecl, RHSDRE, + BinOpResult.first.get()}; }; CombinerAttemptTy TopLevelCombinerInfo = formCombiner(VarTy); @@ -3081,12 +3139,20 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( } } + auto EmitFailureNote = [&](CombinerFailureKind CFK) { + if (CFK == CombinerFailureKind::BinOp) + return Diag(Loc, diag::note_acc_reduction_combiner_forming) + << CFK << BinaryOperator::getOpcodeStr(BinOp); + return Diag(Loc, diag::note_acc_reduction_combiner_forming) << CFK; + }; + // Since the 'root' level didn't fail, the only thing that could be successful // is a struct that we decompose on its individual fields. RecordDecl *RD = VarTy->getAsRecordDecl(); if (!RD) { Diag(Loc, diag::err_acc_reduction_recipe_no_op) << VarTy; + EmitFailureNote(TopLevelCombinerInfo.FailKind); tryCombiner(TopLevelCombinerInfo.LHSDRE, TopLevelCombinerInfo.RHSDRE, /*IncludeTrap=*/false); return true; @@ -3098,6 +3164,7 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( if (!FieldCombinerInfo.Op || FieldCombinerInfo.Op->containsErrors()) { Diag(Loc, diag::err_acc_reduction_recipe_no_op) << FD->getType(); Diag(FD->getBeginLoc(), diag::note_acc_reduction_recipe_noop_field) << RD; + EmitFailureNote(FieldCombinerInfo.FailKind); tryCombiner(FieldCombinerInfo.LHSDRE, FieldCombinerInfo.RHSDRE, /*IncludeTrap=*/false); return true; diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 50acc83..27fd556 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -81,7 +81,7 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A, StringRef PragmaName = llvm::StringSwitch<StringRef>( PragmaNameLoc->getIdentifierInfo()->getName()) - .Cases("unroll", "nounroll", "unroll_and_jam", "nounroll_and_jam", + .Cases({"unroll", "nounroll", "unroll_and_jam", "nounroll_and_jam"}, PragmaNameLoc->getIdentifierInfo()->getName()) .Default("clang loop"); diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index f6a3e79..871400e 100644 --- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -723,6 +723,7 @@ AnalysisConsumer::getModeForDecl(Decl *D, AnalysisMode Mode) { } static UnsignedEPStat PathRunningTime("PathRunningTime"); +static UnsignedEPStat SyntaxRunningTime("SyntaxRunningTime"); void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode, ExprEngine::InliningModes IMode, @@ -761,6 +762,8 @@ void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode, SyntaxCheckTimer->stopTimer(); llvm::TimeRecord CheckerEndTime = SyntaxCheckTimer->getTotalTime(); CheckerEndTime -= CheckerStartTime; + FunctionSummaries.findOrInsertSummary(D)->second.SyntaxRunningTime = + std::lround(CheckerEndTime.getWallTime() * 1000); DisplayTime(CheckerEndTime); if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) { AnalyzerTimers->clear(); @@ -792,11 +795,23 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, if (!CFG) return; + CFGSize.set(CFG->size()); + + auto *DeclContext = Mgr->getAnalysisDeclContext(D); // See if the LiveVariables analysis scales. - if (!Mgr->getAnalysisDeclContext(D)->getAnalysis<RelaxedLiveVariables>()) + if (!DeclContext->getAnalysis<RelaxedLiveVariables>()) return; - CFGSize.set(CFG->size()); + // DeclContext declaration is the redeclaration of D that has a body. + const Decl *DefDecl = DeclContext->getDecl(); + + // Get the SyntaxRunningTime from the function summary, because it is computed + // during the AM_Syntax analysis, which is done at a different point in time + // and in different order, but always before AM_Path. + if (const auto *Summary = FunctionSummaries.findSummary(DefDecl); + Summary && Summary->SyntaxRunningTime.has_value()) { + SyntaxRunningTime.set(*Summary->SyntaxRunningTime); + } ExprEngine Eng(CTU, *Mgr, VisitedCallees, &FunctionSummaries, IMode); diff --git a/clang/test/AST/ByteCode/c.c b/clang/test/AST/ByteCode/c.c index 657a920..cfdc9d0 100644 --- a/clang/test/AST/ByteCode/c.c +++ b/clang/test/AST/ByteCode/c.c @@ -372,3 +372,12 @@ void discardedCmp(void) /// ArraySubscriptExpr that's not an lvalue typedef unsigned char U __attribute__((vector_size(1))); void nonLValueASE(U f) { f[0] = f[((U)(U){0})[0]]; } + +static char foo_(a) // all-warning {{definition without a prototype}} + char a; +{ + return 'a'; +} +static void bar_(void) { + foo_(foo_(1)); +} diff --git a/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp b/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp index ebbc015..3ff3bb1 100644 --- a/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp +++ b/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp @@ -10,6 +10,7 @@ // CHECK-NEXT: "DebugName": "fib(unsigned int)", // CHECK-NEXT: "CFGSize": "5", // CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}", +// CHECK-NEXT: "SyntaxRunningTime": "{{[0-9]+}}", // CHECK-NEXT: "MaxBugClassSize": "{{[0-9]+}}", // CHECK-NEXT: "MaxQueueSize": "{{[0-9]+}}", // CHECK-NEXT: "MaxReachableSize": "{{[0-9]+}}", @@ -47,6 +48,7 @@ // CHECK-NEXT: "DebugName": "main(int, char **)", // CHECK-NEXT: "CFGSize": "3", // CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}", +// CHECK-NEXT: "SyntaxRunningTime": "{{[0-9]+}}", // CHECK-NEXT: "MaxBugClassSize": "{{[0-9]+}}", // CHECK-NEXT: "MaxQueueSize": "{{[0-9]+}}", // CHECK-NEXT: "MaxReachableSize": "{{[0-9]+}}", diff --git a/clang/test/Analysis/z3-crosscheck-max-attempts.cpp b/clang/test/Analysis/z3-crosscheck-max-attempts.cpp index 8439236..ab66fe9 100644 --- a/clang/test/Analysis/z3-crosscheck-max-attempts.cpp +++ b/clang/test/Analysis/z3-crosscheck-max-attempts.cpp @@ -3,10 +3,10 @@ // RUN: | FileCheck %s --match-full-lines // CHECK: crosscheck-with-z3-max-attempts-per-query = 3 -// DEFINE: %{mocked_clang} = \ -// DEFINE: LD_PRELOAD="%llvmshlibdir/MockZ3SolverCheck%pluginext" \ -// DEFINE: %clang_analyze_cc1 %s \ -// DEFINE: -analyzer-config crosscheck-with-z3=true \ +// DEFINE: %{mocked_clang} = \ +// DEFINE: env LD_PRELOAD="%llvmshlibdir/MockZ3SolverCheck%pluginext" \ +// DEFINE: %clang_analyze_cc1 %s \ +// DEFINE: -analyzer-config crosscheck-with-z3=true \ // DEFINE: -analyzer-checker=core // DEFINE: %{attempts} = -analyzer-config crosscheck-with-z3-max-attempts-per-query @@ -14,17 +14,17 @@ // RUN: not %clang_analyze_cc1 %{attempts}=0 2>&1 | FileCheck %s --check-prefix=VERIFY-INVALID // VERIFY-INVALID: invalid input for analyzer-config option 'crosscheck-with-z3-max-attempts-per-query', that expects a positive value -// RUN: Z3_SOLVER_RESULTS="UNDEF" %{mocked_clang} %{attempts}=1 -verify=refuted -// RUN: Z3_SOLVER_RESULTS="UNSAT" %{mocked_clang} %{attempts}=1 -verify=refuted -// RUN: Z3_SOLVER_RESULTS="SAT" %{mocked_clang} %{attempts}=1 -verify=accepted +// RUN: env Z3_SOLVER_RESULTS="UNDEF" %{mocked_clang} %{attempts}=1 -verify=refuted +// RUN: env Z3_SOLVER_RESULTS="UNSAT" %{mocked_clang} %{attempts}=1 -verify=refuted +// RUN: env Z3_SOLVER_RESULTS="SAT" %{mocked_clang} %{attempts}=1 -verify=accepted -// RUN: Z3_SOLVER_RESULTS="UNDEF,UNDEF" %{mocked_clang} %{attempts}=2 -verify=refuted -// RUN: Z3_SOLVER_RESULTS="UNDEF,UNSAT" %{mocked_clang} %{attempts}=2 -verify=refuted -// RUN: Z3_SOLVER_RESULTS="UNDEF,SAT" %{mocked_clang} %{attempts}=2 -verify=accepted +// RUN: env Z3_SOLVER_RESULTS="UNDEF,UNDEF" %{mocked_clang} %{attempts}=2 -verify=refuted +// RUN: env Z3_SOLVER_RESULTS="UNDEF,UNSAT" %{mocked_clang} %{attempts}=2 -verify=refuted +// RUN: env Z3_SOLVER_RESULTS="UNDEF,SAT" %{mocked_clang} %{attempts}=2 -verify=accepted -// RUN: Z3_SOLVER_RESULTS="UNDEF,UNDEF,UNDEF" %{mocked_clang} %{attempts}=3 -verify=refuted -// RUN: Z3_SOLVER_RESULTS="UNDEF,UNDEF,UNSAT" %{mocked_clang} %{attempts}=3 -verify=refuted -// RUN: Z3_SOLVER_RESULTS="UNDEF,UNDEF,SAT" %{mocked_clang} %{attempts}=3 -verify=accepted +// RUN: env Z3_SOLVER_RESULTS="UNDEF,UNDEF,UNDEF" %{mocked_clang} %{attempts}=3 -verify=refuted +// RUN: env Z3_SOLVER_RESULTS="UNDEF,UNDEF,UNSAT" %{mocked_clang} %{attempts}=3 -verify=refuted +// RUN: env Z3_SOLVER_RESULTS="UNDEF,UNDEF,SAT" %{mocked_clang} %{attempts}=3 -verify=accepted // REQUIRES: z3, z3-mock, asserts, shell, system-linux diff --git a/clang/test/Analysis/z3/D83660.c b/clang/test/Analysis/z3/D83660.c index 16ea4ff..a81ce3a 100644 --- a/clang/test/Analysis/z3/D83660.c +++ b/clang/test/Analysis/z3/D83660.c @@ -1,4 +1,4 @@ -// RUN: Z3_SOLVER_RESULTS="SAT,SAT,SAT,SAT,UNDEF" \ +// RUN: env Z3_SOLVER_RESULTS="SAT,SAT,SAT,SAT,UNDEF" \ // RUN: LD_PRELOAD="%llvmshlibdir/MockZ3SolverCheck%pluginext" \ // RUN: %clang_analyze_cc1 -analyzer-constraints=z3 \ // RUN: -analyzer-checker=core %s -verify diff --git a/clang/test/CIR/CodeGen/agg-expr-lvalue.c b/clang/test/CIR/CodeGen/agg-expr-lvalue.c new file mode 100644 index 0000000..c826f8f --- /dev/null +++ b/clang/test/CIR/CodeGen/agg-expr-lvalue.c @@ -0,0 +1,111 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +struct Point { + int x, y; +}; + +struct Line { + struct Point start; + struct Point end; +}; + +// AggExprEmitter::VisitMemberExpr +void test_member_in_array(void) { + struct Line line = {{1, 2}, {3, 4}}; + struct Point arr[1] = {line.start}; +} + +// CIR-LABEL: cir.func{{.*}} @test_member_in_array +// CIR: %[[LINE:.*]] = cir.alloca !rec_Line{{.*}}, ["line", init] +// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!rec_Point x 1>{{.*}}, ["arr", init] +// CIR: %[[MEMBER:.*]] = cir.get_member %[[LINE]][0] {name = "start"} +// CIR: cir.copy + +// LLVM-LABEL: define{{.*}} @test_member_in_array +// LLVM: %[[LINE:.*]] = alloca %struct.Line +// LLVM: %[[ARR:.*]] = alloca [1 x %struct.Point] +// LLVM: %[[MEMBER:.*]] = getelementptr{{.*}}%struct.Line{{.*}}%[[LINE]]{{.*}}i32 0, i32 0 +// LLVM: call void @llvm.memcpy + +// OGCG-LABEL: define{{.*}} @test_member_in_array +// OGCG: %[[LINE:.*]] = alloca %struct.Line +// OGCG: %[[ARR:.*]] = alloca [1 x %struct.Point] +// OGCG: %[[MEMBER:.*]] = getelementptr{{.*}}%struct.Line{{.*}}%[[LINE]]{{.*}}i32 0, i32 0 +// OGCG: call void @llvm.memcpy + +// AggExprEmitter::VisitMemberExpr +void test_member_arrow_in_array(void) { + struct Line *line_ptr; + struct Point arr[1] = {line_ptr->start}; +} + +// CIR-LABEL: cir.func{{.*}} @test_member_arrow_in_array +// CIR: %[[PTR:.*]] = cir.alloca !cir.ptr<!rec_Line>{{.*}}, ["line_ptr"] +// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!rec_Point x 1>{{.*}}, ["arr", init] +// CIR: %[[LOADED:.*]] = cir.load{{.*}}%[[PTR]] +// CIR: %[[MEMBER:.*]] = cir.get_member %[[LOADED]][0] {name = "start"} +// CIR: cir.copy + +// LLVM-LABEL: define{{.*}} @test_member_arrow_in_array +// LLVM: %[[PTR:.*]] = alloca ptr +// LLVM: %[[ARR:.*]] = alloca [1 x %struct.Point] +// LLVM: %[[LOADED:.*]] = load ptr{{.*}}%[[PTR]] +// LLVM: %[[MEMBER:.*]] = getelementptr{{.*}}%struct.Line{{.*}}%[[LOADED]]{{.*}}i32 0, i32 0 +// LLVM: call void @llvm.memcpy + +// OGCG-LABEL: define{{.*}} @test_member_arrow_in_array +// OGCG: %[[PTR:.*]] = alloca ptr +// OGCG: %[[ARR:.*]] = alloca [1 x %struct.Point] +// OGCG: %[[LOADED:.*]] = load ptr{{.*}}%[[PTR]] +// OGCG: %[[MEMBER:.*]] = getelementptr{{.*}}%struct.Line{{.*}}%[[LOADED]]{{.*}}i32 0, i32 0 +// OGCG: call void @llvm.memcpy + +// AggExprEmitter::VisitUnaryDeref +void test_deref_in_array(void) { + struct Point *ptr; + struct Point arr[1] = {*ptr}; +} + +// CIR-LABEL: cir.func{{.*}} @test_deref_in_array +// CIR: %[[PTR:.*]] = cir.alloca !cir.ptr<!rec_Point>{{.*}}, ["ptr"] +// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!rec_Point x 1>{{.*}}, ["arr", init] +// CIR: %[[LOADED:.*]] = cir.load{{.*}}%[[PTR]] +// CIR: cir.copy + +// LLVM-LABEL: define{{.*}} @test_deref_in_array +// LLVM: %[[PTR:.*]] = alloca ptr +// LLVM: %[[ARR:.*]] = alloca [1 x %struct.Point] +// LLVM: %[[LOADED:.*]] = load ptr{{.*}}%[[PTR]] +// LLVM: call void @llvm.memcpy + +// OGCG-LABEL: define{{.*}} @test_deref_in_array +// OGCG: %[[PTR:.*]] = alloca ptr +// OGCG: %[[ARR:.*]] = alloca [1 x %struct.Point] +// OGCG: %[[LOADED:.*]] = load ptr{{.*}}%[[PTR]] +// OGCG: call void @llvm.memcpy + +// AggExprEmitter::VisitStringLiteral +void test_string_array_in_array(void) { + char matrix[2][6] = {"hello", "world"}; +} + +// CIR-LABEL: cir.func{{.*}} @test_string_array_in_array +// CIR: cir.alloca !cir.array<!cir.array<!s8i x 6> x 2>, {{.*}}, ["matrix", init] +// CIR: cir.get_global +// CIR: cir.copy +// CIR: cir.get_global +// CIR: cir.copy + +// LLVM-LABEL: define{{.*}} @test_string_array_in_array +// LLVM: alloca [2 x [6 x i8]] +// LLVM: call void @llvm.memcpy +// LLVM: call void @llvm.memcpy + +// OGCG-LABEL: define{{.*}} @test_string_array_in_array +// OGCG: alloca [2 x [6 x i8]] +// OGCG: call void @llvm.memcpy{{.*}}@__const.test_string_array_in_array diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index 083d438..4e89af4 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -1360,6 +1360,30 @@ void complex_type_argument() { // OGCG: %[[TMP_ARG:.*]] = load <2 x float>, ptr %[[ARG_ADDR]], align 4 // OGCG: call void @_Z22complex_type_parameterCf(<2 x float> noundef %[[TMP_ARG]]) +float _Complex complex_type_return_type() { + return { 1.0f, 2.0f }; +} + +// CIR: %[[RET_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["__retval"] +// CIR: %[[RET_VAL:.*]] = cir.const #cir.const_complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float> +// CIR: cir.store{{.*}} %[[RET_VAL]], %[[RET_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> +// CIR: %[[TMP_RET:.*]] = cir.load %[[RET_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR: cir.return %[[TMP_RET]] : !cir.complex<!cir.float> + +// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering, +// LLVM: %[[RET_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: store { float, float } { float 1.000000e+00, float 2.000000e+00 }, ptr %[[RET_ADDR]], align 4 +// LLVM: %[[TMP_RET:.*]] = load { float, float }, ptr %[[RET_ADDR]], align 4 +// LLVM: ret { float, float } %[[TMP_RET]] + +// OGCG: %[[RET_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: %[[RET_VAL_REAL:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RET_ADDR]], i32 0, i32 0 +// OGCG: %[[RET_VAL_IMAG:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RET_ADDR]], i32 0, i32 1 +// OGCG: store float 1.000000e+00, ptr %[[RET_VAL_REAL]], align 4 +// OGCG: store float 2.000000e+00, ptr %[[RET_VAL_IMAG]], align 4 +// OGCG: %[[TMP_RET:.*]] = load <2 x float>, ptr %[[RET_ADDR]], align 4 +// OGCG: ret <2 x float> %[[TMP_RET]] + void real_on_scalar_bool() { bool a; bool b = __real__ a; @@ -1405,3 +1429,42 @@ void imag_on_scalar_bool() { // OGCG: %[[A_ADDR:.*]] = alloca i8, align 1 // OGCG: %[[B_ADDR:.*]] = alloca i8, align 1 // OGCG: store i8 0, ptr %[[B_ADDR]], align 1 + +void function_with_complex_default_arg( + float _Complex a = __builtin_complex(1.0f, 2.2f)) {} + +// CIR: %[[ARG_0_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a", init] +// CIR: cir.store %{{.*}}, %[[ARG_0_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> + +// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering, + +// LLVM: %[[ARG_0_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: store { float, float } %{{.*}}, ptr %[[ARG_0_ADDR]], align 4 + +// OGCG: %[[ARG_0_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: store <2 x float> %{{.*}}, ptr %[[ARG_0_ADDR]], align 4 + +void calling_function_with_default_arg() { + function_with_complex_default_arg(); +} + +// CIR: %[[DEFAULT_ARG_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["coerce"] +// CIR: %[[DEFAULT_ARG_VAL:.*]] = cir.const #cir.const_complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.200000e+00> : !cir.float> : !cir.complex<!cir.float> +// CIR: cir.store{{.*}} %[[DEFAULT_ARG_VAL]], %[[DEFAULT_ARG_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> +// CIR: %[[TMP_DEFAULT_ARG:.*]] = cir.load{{.*}} %[[DEFAULT_ARG_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR: cir.call @_Z33function_with_complex_default_argCf(%[[TMP_DEFAULT_ARG]]) : (!cir.complex<!cir.float>) -> () + +// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering, + +// LLVM: %[[DEFAULT_ARG_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: store { float, float } { float 1.000000e+00, float 0x40019999A0000000 }, ptr %[[DEFAULT_ARG_ADDR]], align 4 +// LLVM: %[[TMP_DEFAULT_ARG:.*]] = load { float, float }, ptr %[[DEFAULT_ARG_ADDR]], align 4 +// LLVM: call void @_Z33function_with_complex_default_argCf({ float, float } %[[TMP_DEFAULT_ARG]]) + +// OGCG: %[[DEFAULT_ARG_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: %[[DEFAULT_ARG_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[DEFAULT_ARG_ADDR]], i32 0, i32 0 +// OGCG: %[[DEFAULT_ARG_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[DEFAULT_ARG_ADDR]], i32 0, i32 1 +// OGCG: store float 1.000000e+00, ptr %[[DEFAULT_ARG_REAL_PTR]], align 4 +// OGCG: store float 0x40019999A0000000, ptr %[[DEFAULT_ARG_IMAG_PTR]], align 4 +// OGCG: %[[TMP_DEFAULT_ARG:.*]] = load <2 x float>, ptr %[[DEFAULT_ARG_ADDR]], align 4 +// OGCG: call void @_Z33function_with_complex_default_argCf(<2 x float> {{.*}} %[[TMP_DEFAULT_ARG]]) diff --git a/clang/test/CIR/CodeGen/compound_literal.cpp b/clang/test/CIR/CodeGen/compound_literal.cpp index a92af95..30a1dc0 100644 --- a/clang/test/CIR/CodeGen/compound_literal.cpp +++ b/clang/test/CIR/CodeGen/compound_literal.cpp @@ -97,3 +97,30 @@ void foo3() { // OGCG: %[[TMP:.*]] = load <4 x i32>, ptr %[[CL_ADDR]], align 16 // OGCG: store <4 x i32> %[[TMP]], ptr %[[A_ADDR]], align 16 +struct Point { + int x, y; +}; + +void foo4() { + Point p = (Point){5, 10}; +} + +// CIR-LABEL: @_Z4foo4v +// CIR: %[[P:.*]] = cir.alloca !rec_Point, !cir.ptr<!rec_Point>, ["p", init] +// CIR: %[[P_X:.*]] = cir.get_member %[[P]][0] {name = "x"} +// CIR: %[[FIVE:.*]] = cir.const #cir.int<5> : !s32i +// CIR: cir.store{{.*}} %[[FIVE]], %[[P_X]] +// CIR: %[[P_Y:.*]] = cir.get_member %[[P]][1] {name = "y"} +// CIR: %[[TEN:.*]] = cir.const #cir.int<10> : !s32i +// CIR: cir.store{{.*}} %[[TEN]], %[[P_Y]] + +// LLVM-LABEL: @_Z4foo4v +// LLVM: %[[P:.*]] = alloca %struct.Point +// LLVM: %[[P_X:.*]] = getelementptr %struct.Point, ptr %[[P]], i32 0, i32 0 +// LLVM: store i32 5, ptr %[[P_X]] +// LLVM: %[[P_Y:.*]] = getelementptr %struct.Point, ptr %[[P]], i32 0, i32 1 +// LLVM: store i32 10, ptr %[[P_Y]] + +// OGCG-LABEL: @_Z4foo4v +// OGCG: %[[P:.*]] = alloca %struct.Point +// OGCG: call void @llvm.memcpy{{.*}}(ptr{{.*}} %[[P]], ptr{{.*}} @__const._Z4foo4v.p diff --git a/clang/test/CIR/CodeGen/opaque.cpp b/clang/test/CIR/CodeGen/opaque.cpp index 028bfd9..eac0dfa 100644 --- a/clang/test/CIR/CodeGen/opaque.cpp +++ b/clang/test/CIR/CodeGen/opaque.cpp @@ -154,3 +154,166 @@ void foo3() { // OGCG: [[COND_END]]: // OGCG: %[[RESULT:.*]] = phi i32 [ %[[TMP_A]], %[[COND_TRUE]] ], [ %[[TMP_B]], %[[COND_FALSE]] ] // OGCG: store i32 %[[RESULT]], ptr %[[C_ADDR]], align 4 + +void test_gnu_binary_lvalue_assign() { + int a = 5; + int b = 10; + (a ?: b) = 42; +} + +// CIR-LABEL: cir.func{{.*}} @_Z29test_gnu_binary_lvalue_assignv( +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[B:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] +// CIR: %[[A_VAL:.*]] = cir.load{{.*}} %[[A]] : !cir.ptr<!s32i>, !s32i +// CIR: %[[A_BOOL:.*]] = cir.cast int_to_bool %[[A_VAL]] : !s32i -> !cir.bool +// CIR: %[[TERNARY_PTR:.*]] = cir.ternary(%[[A_BOOL]], true { +// CIR: cir.yield %[[A]] : !cir.ptr<!s32i> +// CIR: }, false { +// CIR: cir.yield %[[B]] : !cir.ptr<!s32i> +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> +// CIR: cir.store{{.*}} %{{.*}}, %[[TERNARY_PTR]] : !s32i, !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} void @_Z29test_gnu_binary_lvalue_assignv( +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: %[[B:.*]] = alloca i32 +// LLVM: %[[A_VAL:.*]] = load i32, ptr %[[A]] +// LLVM: %[[COND:.*]] = icmp ne i32 %[[A_VAL]], 0 +// LLVM: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: br label %[[MERGE_BB:.*]] +// LLVM: [[FALSE_BB]]: +// LLVM: br label %[[MERGE_BB]] +// LLVM: [[MERGE_BB]]: +// LLVM: %[[PHI_PTR:.*]] = phi ptr [ %[[B]], %[[FALSE_BB]] ], [ %[[A]], %[[TRUE_BB]] ] +// LLVM: br label %[[CONT_BB:.*]] +// LLVM: [[CONT_BB]]: +// LLVM: store i32 42, ptr %[[PHI_PTR]] + +// OGCG-LABEL: define{{.*}} void @_Z29test_gnu_binary_lvalue_assignv( +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: %[[B:.*]] = alloca i32 +// OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A]] +// OGCG: %[[COND:.*]] = icmp ne i32 %[[A_VAL]], 0 +// OGCG: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: br label %[[MERGE_BB:.*]] +// OGCG: [[FALSE_BB]]: +// OGCG: br label %[[MERGE_BB]] +// OGCG: [[MERGE_BB]]: +// OGCG: %[[PHI_PTR:.*]] = phi ptr [ %[[A]], %[[TRUE_BB]] ], [ %[[B]], %[[FALSE_BB]] ] +// OGCG: store i32 42, ptr %[[PHI_PTR]] + +void test_gnu_binary_lvalue_compound() { + int a = 7; + int b = 14; + (a ?: b) += 5; +} + +// CIR-LABEL: cir.func{{.*}} @_Z31test_gnu_binary_lvalue_compoundv( +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[B:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] +// CIR: %[[A_VAL:.*]] = cir.load{{.*}} %[[A]] : !cir.ptr<!s32i>, !s32i +// CIR: %[[A_BOOL:.*]] = cir.cast int_to_bool %[[A_VAL]] : !s32i -> !cir.bool +// CIR: %[[LVAL_PTR:.*]] = cir.ternary(%[[A_BOOL]], true { +// CIR: cir.yield %[[A]] : !cir.ptr<!s32i> +// CIR: }, false { +// CIR: cir.yield %[[B]] : !cir.ptr<!s32i> +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> +// CIR: %[[OLD_VAL:.*]] = cir.load{{.*}} %[[LVAL_PTR]] +// CIR: %[[NEW_VAL:.*]] = cir.binop(add, %[[OLD_VAL]], %{{.*}}) +// CIR: cir.store{{.*}} %[[NEW_VAL]], %[[LVAL_PTR]] + +// LLVM-LABEL: define{{.*}} void @_Z31test_gnu_binary_lvalue_compoundv( +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: %[[B:.*]] = alloca i32 +// LLVM: %[[A_VAL:.*]] = load i32, ptr %[[A]] +// LLVM: %[[COND:.*]] = icmp ne i32 %[[A_VAL]], 0 +// LLVM: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: br label %[[MERGE_BB:.*]] +// LLVM: [[FALSE_BB]]: +// LLVM: br label %[[MERGE_BB]] +// LLVM: [[MERGE_BB]]: +// LLVM: %[[PTR:.*]] = phi ptr [ %[[B]], %[[FALSE_BB]] ], [ %[[A]], %[[TRUE_BB]] ] +// LLVM: br label %[[CONT:.*]] +// LLVM: [[CONT]]: +// LLVM: %[[OLD:.*]] = load i32, ptr %[[PTR]] +// LLVM: %[[NEW:.*]] = add{{.*}} i32 %[[OLD]], 5 +// LLVM: store i32 %[[NEW]], ptr %[[PTR]] + +// OGCG-LABEL: define{{.*}} void @_Z31test_gnu_binary_lvalue_compoundv( +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: %[[B:.*]] = alloca i32 +// OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A]] +// OGCG: %[[COND:.*]] = icmp ne i32 %[[A_VAL]], 0 +// OGCG: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: br label %[[MERGE_BB:.*]] +// OGCG: [[FALSE_BB]]: +// OGCG: br label %[[MERGE_BB]] +// OGCG: [[MERGE_BB]]: +// OGCG: %[[PTR:.*]] = phi ptr [ %[[A]], %[[TRUE_BB]] ], [ %[[B]], %[[FALSE_BB]] ] +// OGCG: %[[OLD:.*]] = load i32, ptr %[[PTR]] +// OGCG: %[[NEW:.*]] = add{{.*}} i32 %[[OLD]], 5 +// OGCG: store i32 %[[NEW]], ptr %[[PTR]] + +void test_gnu_binary_lvalue_ptr() { + int x = 1, y = 2; + int *p = &x; + int *q = nullptr; + *(p ?: q) = 99; +} + +// CIR-LABEL: cir.func{{.*}} @_Z26test_gnu_binary_lvalue_ptrv( +// CIR: %[[X:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] +// CIR: %[[Y:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] +// CIR: %[[P:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p", init] +// CIR: %[[Q:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["q", init] +// CIR: %[[P_VAL:.*]] = cir.load{{.*}} %[[P]] +// CIR: %[[P_BOOL:.*]] = cir.cast ptr_to_bool %[[P_VAL]] +// CIR: %[[PTR_RESULT:.*]] = cir.ternary(%[[P_BOOL]], true { +// CIR: %[[P_LOAD:.*]] = cir.load{{.*}} %[[P]] +// CIR: cir.yield %[[P_LOAD]] : !cir.ptr<!s32i> +// CIR: }, false { +// CIR: %[[Q_LOAD:.*]] = cir.load{{.*}} %[[Q]] +// CIR: cir.yield %[[Q_LOAD]] : !cir.ptr<!s32i> +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> +// CIR: cir.store{{.*}} %{{.*}}, %[[PTR_RESULT]] + +// LLVM-LABEL: define{{.*}} void @_Z26test_gnu_binary_lvalue_ptrv( +// LLVM: %[[X:.*]] = alloca i32 +// LLVM: %[[Y:.*]] = alloca i32 +// LLVM: %[[P:.*]] = alloca ptr +// LLVM: %[[Q:.*]] = alloca ptr +// LLVM: %[[P_VAL:.*]] = load ptr, ptr %[[P]] +// LLVM: %[[COND:.*]] = icmp ne ptr %[[P_VAL]], null +// LLVM: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: %[[P_LOAD:.*]] = load ptr, ptr %[[P]] +// LLVM: br label %[[MERGE_BB:.*]] +// LLVM: [[FALSE_BB]]: +// LLVM: %[[Q_LOAD:.*]] = load ptr, ptr %[[Q]] +// LLVM: br label %[[MERGE_BB]] +// LLVM: [[MERGE_BB]]: +// LLVM: %[[PHI:.*]] = phi ptr [ %[[Q_LOAD]], %[[FALSE_BB]] ], [ %[[P_LOAD]], %[[TRUE_BB]] ] +// LLVM: br label %[[CONT:.*]] +// LLVM: [[CONT]]: +// LLVM: store i32 99, ptr %[[PHI]] + +// OGCG-LABEL: define{{.*}} void @_Z26test_gnu_binary_lvalue_ptrv( +// OGCG: %[[X:.*]] = alloca i32 +// OGCG: %[[Y:.*]] = alloca i32 +// OGCG: %[[P:.*]] = alloca ptr +// OGCG: %[[Q:.*]] = alloca ptr +// OGCG: %[[P_VAL:.*]] = load ptr, ptr %[[P]] +// OGCG: %[[COND:.*]] = icmp ne ptr %[[P_VAL]], null +// OGCG: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: %[[P_LOAD:.*]] = load ptr, ptr %[[P]] +// OGCG: br label %[[MERGE_BB:.*]] +// OGCG: [[FALSE_BB]]: +// OGCG: %[[Q_LOAD:.*]] = load ptr, ptr %[[Q]] +// OGCG: br label %[[MERGE_BB]] +// OGCG: [[MERGE_BB]]: +// OGCG: %[[PHI:.*]] = phi ptr [ %[[P_LOAD]], %[[TRUE_BB]] ], [ %[[Q_LOAD]], %[[FALSE_BB]] ] +// OGCG: store i32 99, ptr %[[PHI]] diff --git a/clang/test/CIR/CodeGen/ternary-throw.cpp b/clang/test/CIR/CodeGen/ternary-throw.cpp new file mode 100644 index 0000000..fb8897f --- /dev/null +++ b/clang/test/CIR/CodeGen/ternary-throw.cpp @@ -0,0 +1,197 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fexceptions -fcxx-exceptions -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fexceptions -fcxx-exceptions -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fexceptions -fcxx-exceptions -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +const int& test_cond_throw_false(bool flag) { + const int a = 10; + return flag ? a : throw 0; +} + +// CIR-LABEL: cir.func{{.*}} @_Z21test_cond_throw_falseb( +// CIR: %[[FLAG:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["flag", init] +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init, const] +// CIR: %[[TEN:.*]] = cir.const #cir.int<10> : !s32i +// CIR: cir.store{{.*}} %[[TEN]], %[[A]] : !s32i, !cir.ptr<!s32i> +// CIR: %[[FLAG_VAL:.*]] = cir.load{{.*}} %[[FLAG]] : !cir.ptr<!cir.bool>, !cir.bool +// CIR: %[[RESULT:.*]] = cir.ternary(%[[FLAG_VAL]], true { +// CIR: cir.yield %[[A]] : !cir.ptr<!s32i> +// CIR: }, false { +// CIR: %[[EXCEPTION:.*]] = cir.alloc.exception{{.*}} -> !cir.ptr<!s32i> +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store{{.*}} %[[ZERO]], %[[EXCEPTION]] : !s32i, !cir.ptr<!s32i> +// CIR: cir.throw %[[EXCEPTION]] : !cir.ptr<!s32i>, @_ZTIi +// CIR: cir.unreachable +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} ptr @_Z21test_cond_throw_falseb( +// LLVM: %[[FLAG_ALLOCA:.*]] = alloca i8 +// LLVM: %[[RET_ALLOCA:.*]] = alloca ptr +// LLVM: %[[A_ALLOCA:.*]] = alloca i32 +// LLVM: %[[ZEXT:.*]] = zext i1 %{{.*}} to i8 +// LLVM: store i8 %[[ZEXT]], ptr %[[FLAG_ALLOCA]] +// LLVM: store i32 10, ptr %[[A_ALLOCA]] +// LLVM: %[[LOAD:.*]] = load i8, ptr %[[FLAG_ALLOCA]] +// LLVM: %[[BOOL:.*]] = trunc i8 %[[LOAD]] to i1 +// LLVM: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: br label %[[PHI_BB:.*]] +// LLVM: [[FALSE_BB]]: +// LLVM: %[[EXC:.*]] = call{{.*}} ptr @__cxa_allocate_exception +// LLVM: store i32 0, ptr %[[EXC]] +// LLVM: call void @__cxa_throw(ptr %[[EXC]], ptr @_ZTIi +// LLVM: unreachable +// LLVM: [[PHI_BB]]: +// LLVM: %[[PHI:.*]] = phi ptr [ %[[A_ALLOCA]], %[[TRUE_BB]] ] +// LLVM: br label %[[CONT_BB:.*]] +// LLVM: [[CONT_BB]]: +// LLVM: store ptr %[[A_ALLOCA]], ptr %[[RET_ALLOCA]] +// LLVM: %[[RET:.*]] = load ptr, ptr %[[RET_ALLOCA]] +// LLVM: ret ptr %[[RET]] + +// OGCG-LABEL: define{{.*}} ptr @_Z21test_cond_throw_falseb( +// OGCG: %{{.*}} = alloca i8 +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: store i32 10, ptr %[[A]] +// OGCG: %{{.*}} = load i8, ptr %{{.*}} +// OGCG: %[[BOOL:.*]] = trunc i8 %{{.*}} to i1 +// OGCG: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: br label %[[END:.*]] +// OGCG: [[FALSE_BB]]: +// OGCG: %{{.*}} = call{{.*}} ptr @__cxa_allocate_exception +// OGCG: store i32 0, ptr %{{.*}} +// OGCG: call void @__cxa_throw(ptr %{{.*}}, ptr @_ZTIi +// OGCG: unreachable +// OGCG: [[END]]: +// OGCG: ret ptr %[[A]] + +const int& test_cond_throw_true(bool flag) { + const int a = 10; + return flag ? throw 0 : a; +} + +// CIR-LABEL: cir.func{{.*}} @_Z20test_cond_throw_trueb( +// CIR: %[[FLAG:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["flag", init] +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init, const] +// CIR: %[[TEN:.*]] = cir.const #cir.int<10> : !s32i +// CIR: cir.store{{.*}} %[[TEN]], %[[A]] : !s32i, !cir.ptr<!s32i> +// CIR: %[[FLAG_VAL:.*]] = cir.load{{.*}} %[[FLAG]] : !cir.ptr<!cir.bool>, !cir.bool +// CIR: %[[RESULT:.*]] = cir.ternary(%[[FLAG_VAL]], true { +// CIR: %[[EXCEPTION:.*]] = cir.alloc.exception{{.*}} -> !cir.ptr<!s32i> +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store{{.*}} %[[ZERO]], %[[EXCEPTION]] : !s32i, !cir.ptr<!s32i> +// CIR: cir.throw %[[EXCEPTION]] : !cir.ptr<!s32i>, @_ZTIi +// CIR: cir.unreachable +// CIR: }, false { +// CIR: cir.yield %[[A]] : !cir.ptr<!s32i> +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} ptr @_Z20test_cond_throw_trueb( +// LLVM: %[[FLAG_ALLOCA:.*]] = alloca i8 +// LLVM: %[[RET_ALLOCA:.*]] = alloca ptr +// LLVM: %[[A_ALLOCA:.*]] = alloca i32 +// LLVM: %[[ZEXT:.*]] = zext i1 %{{.*}} to i8 +// LLVM: store i8 %[[ZEXT]], ptr %[[FLAG_ALLOCA]] +// LLVM: store i32 10, ptr %[[A_ALLOCA]] +// LLVM: %[[LOAD:.*]] = load i8, ptr %[[FLAG_ALLOCA]] +// LLVM: %[[BOOL:.*]] = trunc i8 %[[LOAD]] to i1 +// LLVM: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: %[[EXC:.*]] = call{{.*}} ptr @__cxa_allocate_exception +// LLVM: store i32 0, ptr %[[EXC]] +// LLVM: call void @__cxa_throw(ptr %[[EXC]], ptr @_ZTIi +// LLVM: unreachable +// LLVM: [[FALSE_BB]]: +// LLVM: br label %[[PHI_BB:.*]] +// LLVM: [[PHI_BB]]: +// LLVM: %[[PHI:.*]] = phi ptr [ %[[A_ALLOCA]], %[[FALSE_BB]] ] +// LLVM: br label %[[CONT_BB:.*]] +// LLVM: [[CONT_BB]]: +// LLVM: store ptr %[[A_ALLOCA]], ptr %[[RET_ALLOCA]] +// LLVM: %[[RET:.*]] = load ptr, ptr %[[RET_ALLOCA]] +// LLVM: ret ptr %[[RET]] + +// OGCG-LABEL: define{{.*}} ptr @_Z20test_cond_throw_trueb( +// OGCG: %{{.*}} = alloca i8 +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: store i32 10, ptr %[[A]] +// OGCG: %{{.*}} = load i8, ptr %{{.*}} +// OGCG: %[[BOOL:.*]] = trunc i8 %{{.*}} to i1 +// OGCG: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: %{{.*}} = call{{.*}} ptr @__cxa_allocate_exception +// OGCG: store i32 0, ptr %{{.*}} +// OGCG: call void @__cxa_throw(ptr %{{.*}}, ptr @_ZTIi +// OGCG: unreachable +// OGCG: [[FALSE_BB]]: +// OGCG: br label %[[END:.*]] +// OGCG: [[END]]: +// OGCG: ret ptr %[[A]] + +// Test constant folding with throw - compile-time true condition, dead throw in false branch +const int& test_cond_const_true_throw_false() { + const int a = 20; + return true ? a : throw 0; +} + +// CIR-LABEL: cir.func{{.*}} @_Z32test_cond_const_true_throw_falsev( +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init, const] +// CIR: %[[TWENTY:.*]] = cir.const #cir.int<20> : !s32i +// CIR: cir.store{{.*}} %[[TWENTY]], %[[A]] : !s32i, !cir.ptr<!s32i> +// CIR-NOT: cir.ternary +// CIR-NOT: cir.throw +// CIR: cir.store %[[A]] +// CIR: %[[RET:.*]] = cir.load +// CIR: cir.return %[[RET]] : !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} ptr @_Z32test_cond_const_true_throw_falsev( +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: store i32 20, ptr %[[A]] +// LLVM-NOT: br i1 +// LLVM-NOT: __cxa_throw +// LLVM: store ptr %[[A]] +// LLVM: %[[RET:.*]] = load ptr +// LLVM: ret ptr %[[RET]] + +// OGCG-LABEL: define{{.*}} ptr @_Z32test_cond_const_true_throw_falsev( +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: store i32 20, ptr %[[A]] +// OGCG-NOT: br i1 +// OGCG-NOT: __cxa_throw +// OGCG: ret ptr %[[A]] + +// Test constant folding with throw - compile-time false condition, dead throw in true branch +const int& test_cond_const_false_throw_true() { + const int a = 30; + return false ? throw 0 : a; +} + +// CIR-LABEL: cir.func{{.*}} @_Z32test_cond_const_false_throw_truev( +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init, const] +// CIR: %[[THIRTY:.*]] = cir.const #cir.int<30> : !s32i +// CIR: cir.store{{.*}} %[[THIRTY]], %[[A]] : !s32i, !cir.ptr<!s32i> +// CIR-NOT: cir.ternary +// CIR-NOT: cir.throw +// CIR: cir.store %[[A]] +// CIR: %[[RET:.*]] = cir.load +// CIR: cir.return %[[RET]] : !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} ptr @_Z32test_cond_const_false_throw_truev( +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: store i32 30, ptr %[[A]] +// LLVM-NOT: br i1 +// LLVM-NOT: __cxa_throw +// LLVM: store ptr %[[A]] +// LLVM: %[[RET:.*]] = load ptr +// LLVM: ret ptr %[[RET]] + +// OGCG-LABEL: define{{.*}} ptr @_Z32test_cond_const_false_throw_truev( +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: store i32 30, ptr %[[A]] +// OGCG-NOT: br i1 +// OGCG-NOT: __cxa_throw +// OGCG: ret ptr %[[A]] + diff --git a/clang/test/CIR/CodeGen/ternary.cpp b/clang/test/CIR/CodeGen/ternary.cpp index e7b7270..847c0b4 100644 --- a/clang/test/CIR/CodeGen/ternary.cpp +++ b/clang/test/CIR/CodeGen/ternary.cpp @@ -10,11 +10,11 @@ int x(int y) { } // CIR-LABEL: cir.func{{.*}} @_Z1xi( -// CIR-SAME: %[[ARG0:.*]]: !s32i {{.*}}) -> !s32i -// CIR: [[Y:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64} -// CIR: [[RETVAL:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64} +// CIR-SAME: %[[ARG0:.*]]: !s32i {{.*}}) -> !s32i{{.*}}{ +// CIR: [[Y:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] +// CIR: [[RETVAL:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] // CIR: cir.store %[[ARG0]], [[Y]] : !s32i, !cir.ptr<!s32i> -// CIR: [[YVAL:%.+]] = cir.load align(4) [[Y]] : !cir.ptr<!s32i>, !s32i +// CIR: [[YVAL:%.+]] = cir.load{{.*}} [[Y]] : !cir.ptr<!s32i>, !s32i // CIR: [[ZERO:%.+]] = cir.const #cir.int<0> : !s32i // CIR: [[CMP:%.+]] = cir.cmp(gt, [[YVAL]], [[ZERO]]) : !s32i, !cir.bool // CIR: [[THREE:%.+]] = cir.const #cir.int<3> : !s32i @@ -52,21 +52,21 @@ int foo(int a, int b) { } // CIR-LABEL: cir.func{{.*}} @_Z3fooii( -// CIR-SAME: %[[ARG0:.*]]: !s32i {{.*}}, %[[ARG1:.*]]: !s32i {{.*}}) -> !s32i -// CIR: [[A:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64} -// CIR: [[B:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64} -// CIR: [[RETVAL:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64} +// CIR-SAME: %[[ARG0:.*]]: !s32i {{.*}}, %[[ARG1:.*]]: !s32i {{.*}}) -> !s32i{{.*}}{ +// CIR: [[A:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: [[B:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] +// CIR: [[RETVAL:%.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] // CIR: cir.store %[[ARG0]], [[A]] : !s32i, !cir.ptr<!s32i> // CIR: cir.store %[[ARG1]], [[B]] : !s32i, !cir.ptr<!s32i> // CIR: cir.scope { -// CIR: [[ALOAD:%.+]] = cir.load align(4) [[A]] : !cir.ptr<!s32i>, !s32i -// CIR: [[BLOAD:%.+]] = cir.load align(4) [[B]] : !cir.ptr<!s32i>, !s32i +// CIR: [[ALOAD:%.+]] = cir.load{{.*}} [[A]] : !cir.ptr<!s32i>, !s32i +// CIR: [[BLOAD:%.+]] = cir.load{{.*}} [[B]] : !cir.ptr<!s32i>, !s32i // CIR: [[CMP:%.+]] = cir.cmp(lt, [[ALOAD]], [[BLOAD]]) : !s32i, !cir.bool // CIR: [[TERNARY_RES:%.+]] = cir.ternary([[CMP]], true { // CIR: [[ZERO:%.+]] = cir.const #cir.int<0> : !s32i // CIR: cir.yield [[ZERO]] : !s32i // CIR: }, false { -// CIR: [[ALOAD2:%.+]] = cir.load align(4) [[A]] : !cir.ptr<!s32i>, !s32i +// CIR: [[ALOAD2:%.+]] = cir.load{{.*}} [[A]] : !cir.ptr<!s32i>, !s32i // CIR: cir.yield [[ALOAD2]] : !s32i // CIR: }) : (!cir.bool) -> !s32i // CIR: [[CAST:%.+]] = cir.cast int_to_bool [[TERNARY_RES]] : !s32i -> !cir.bool @@ -145,3 +145,214 @@ int foo(int a, int b) { // OGCG: [[RETURN]]: // OGCG: %[[RET2:.*]] = load i32, ptr %[[RETVAL]] // OGCG: ret i32 %[[RET2]] + +void test_cond_lvalue_assign(bool flag) { + int a = 1; + int b = 2; + (flag ? a : b) = 42; +} + +// CIR-LABEL: cir.func{{.*}} @_Z23test_cond_lvalue_assignb( +// CIR: %[[FLAG:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["flag", init] +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[B:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] +// CIR: %[[FLAG_VAL:.*]] = cir.load{{.*}} %[[FLAG]] +// CIR: %[[TERNARY_PTR:.*]] = cir.ternary(%[[FLAG_VAL]], true { +// CIR: cir.yield %[[A]] : !cir.ptr<!s32i> +// CIR: }, false { +// CIR: cir.yield %[[B]] : !cir.ptr<!s32i> +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> +// CIR: cir.store{{.*}} %{{.*}}, %[[TERNARY_PTR]] : !s32i, !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} void @_Z23test_cond_lvalue_assignb( +// LLVM: %[[FLAG:.*]] = alloca i8 +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: %[[B:.*]] = alloca i32 +// LLVM: %[[FLAG_VAL:.*]] = load i8, ptr %[[FLAG]] +// LLVM: %[[COND:.*]] = trunc i8 %[[FLAG_VAL]] to i1 +// LLVM: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: br label %[[MERGE_BB:.*]] +// LLVM: [[FALSE_BB]]: +// LLVM: br label %[[MERGE_BB]] +// LLVM: [[MERGE_BB]]: +// LLVM: %[[PHI_PTR:.*]] = phi ptr [ %[[B]], %[[FALSE_BB]] ], [ %[[A]], %[[TRUE_BB]] ] +// LLVM: br label %[[CONT_BB:.*]] +// LLVM: [[CONT_BB]]: +// LLVM: store i32 42, ptr %[[PHI_PTR]] + +// OGCG-LABEL: define{{.*}} void @_Z23test_cond_lvalue_assignb( +// OGCG: %[[FLAG:.*]] = alloca i8 +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: %[[B:.*]] = alloca i32 +// OGCG: %[[FLAG_VAL:.*]] = load i8, ptr %[[FLAG]] +// OGCG: %[[COND:.*]] = trunc i8 %[[FLAG_VAL]] to i1 +// OGCG: br i1 %[[COND]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: br label %[[MERGE_BB:.*]] +// OGCG: [[FALSE_BB]]: +// OGCG: br label %[[MERGE_BB]] +// OGCG: [[MERGE_BB]]: +// OGCG: %[[PHI_PTR:.*]] = phi ptr [ %[[A]], %[[TRUE_BB]] ], [ %[[B]], %[[FALSE_BB]] ] +// OGCG: store i32 42, ptr %[[PHI_PTR]] + +int& test_cond_lvalue_ref(bool cond, int x, int y) { + return cond ? x : y; +} + +// CIR-LABEL: cir.func{{.*}} @_Z20test_cond_lvalue_refbii( +// CIR: %[[COND:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cond", init] +// CIR: %[[X:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] +// CIR: %[[Y:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] +// CIR: %[[COND_VAL:.*]] = cir.load{{.*}} %[[COND]] +// CIR: %[[REF_PTR:.*]] = cir.ternary(%[[COND_VAL]], true { +// CIR: cir.yield %[[X]] : !cir.ptr<!s32i> +// CIR: }, false { +// CIR: cir.yield %[[Y]] : !cir.ptr<!s32i> +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> +// CIR: cir.store %[[REF_PTR]] +// CIR: %[[RET_VAL:.*]] = cir.load +// CIR: cir.return %[[RET_VAL]] : !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} ptr @_Z20test_cond_lvalue_refbii( +// LLVM: %[[COND:.*]] = alloca i8 +// LLVM: %[[X:.*]] = alloca i32 +// LLVM: %[[Y:.*]] = alloca i32 +// LLVM: %[[COND_VAL:.*]] = load i8, ptr %[[COND]] +// LLVM: %[[BOOL:.*]] = trunc i8 %[[COND_VAL]] to i1 +// LLVM: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: br label %[[MERGE_BB:.*]] +// LLVM: [[FALSE_BB]]: +// LLVM: br label %[[MERGE_BB]] +// LLVM: [[MERGE_BB]]: +// LLVM: %[[PHI:.*]] = phi ptr [ %[[Y]], %[[FALSE_BB]] ], [ %[[X]], %[[TRUE_BB]] ] +// LLVM: br label %[[CONT:.*]] +// LLVM: [[CONT]]: +// LLVM: store ptr %[[PHI]] +// LLVM: %[[RET:.*]] = load ptr +// LLVM: ret ptr %[[RET]] + +// OGCG-LABEL: define{{.*}} ptr @_Z20test_cond_lvalue_refbii( +// OGCG: %[[COND:.*]] = alloca i8 +// OGCG: %[[X:.*]] = alloca i32 +// OGCG: %[[Y:.*]] = alloca i32 +// OGCG: %[[COND_VAL:.*]] = load i8, ptr %[[COND]] +// OGCG: %[[BOOL:.*]] = trunc i8 %[[COND_VAL]] to i1 +// OGCG: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: br label %[[MERGE_BB:.*]] +// OGCG: [[FALSE_BB]]: +// OGCG: br label %[[MERGE_BB]] +// OGCG: [[MERGE_BB]]: +// OGCG: %[[PHI:.*]] = phi ptr [ %[[X]], %[[TRUE_BB]] ], [ %[[Y]], %[[FALSE_BB]] ] +// OGCG: ret ptr %[[PHI]] + +// Test ConditionalOperator as lvalue - compound assignment +void test_cond_lvalue_compound(bool flag) { + int a = 5; + int b = 10; + (flag ? a : b) += 3; +} + +// CIR-LABEL: cir.func{{.*}} @_Z25test_cond_lvalue_compoundb( +// CIR: %[[FLAG:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["flag", init] +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[B:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] +// CIR: %[[FLAG_VAL:.*]] = cir.load{{.*}} %[[FLAG]] +// CIR: %[[LVAL_PTR:.*]] = cir.ternary(%[[FLAG_VAL]], true { +// CIR: cir.yield %[[A]] : !cir.ptr<!s32i> +// CIR: }, false { +// CIR: cir.yield %[[B]] : !cir.ptr<!s32i> +// CIR: }) : (!cir.bool) -> !cir.ptr<!s32i> +// CIR: %[[OLD_VAL:.*]] = cir.load{{.*}} %[[LVAL_PTR]] +// CIR: %[[NEW_VAL:.*]] = cir.binop(add, %[[OLD_VAL]], %{{.*}}) +// CIR: cir.store{{.*}} %[[NEW_VAL]], %[[LVAL_PTR]] + +// LLVM-LABEL: define{{.*}} void @_Z25test_cond_lvalue_compoundb( +// LLVM: %[[FLAG:.*]] = alloca i8 +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: %[[B:.*]] = alloca i32 +// LLVM: %[[FLAG_VAL:.*]] = load i8, ptr %[[FLAG]] +// LLVM: %[[BOOL:.*]] = trunc i8 %[[FLAG_VAL]] to i1 +// LLVM: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// LLVM: [[TRUE_BB]]: +// LLVM: br label %[[MERGE_BB:.*]] +// LLVM: [[FALSE_BB]]: +// LLVM: br label %[[MERGE_BB]] +// LLVM: [[MERGE_BB]]: +// LLVM: %[[PTR:.*]] = phi ptr [ %[[B]], %[[FALSE_BB]] ], [ %[[A]], %[[TRUE_BB]] ] +// LLVM: br label %[[CONT:.*]] +// LLVM: [[CONT]]: +// LLVM: %[[OLD:.*]] = load i32, ptr %[[PTR]] +// LLVM: %[[NEW:.*]] = add{{.*}} i32 %[[OLD]], 3 +// LLVM: store i32 %[[NEW]], ptr %[[PTR]] + +// OGCG-LABEL: define{{.*}} void @_Z25test_cond_lvalue_compoundb( +// OGCG: %[[FLAG:.*]] = alloca i8 +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: %[[B:.*]] = alloca i32 +// OGCG: %[[FLAG_VAL:.*]] = load i8, ptr %[[FLAG]] +// OGCG: %[[BOOL:.*]] = trunc i8 %[[FLAG_VAL]] to i1 +// OGCG: br i1 %[[BOOL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]] +// OGCG: [[TRUE_BB]]: +// OGCG: br label %[[MERGE_BB:.*]] +// OGCG: [[FALSE_BB]]: +// OGCG: br label %[[MERGE_BB]] +// OGCG: [[MERGE_BB]]: +// OGCG: %[[PTR:.*]] = phi ptr [ %[[A]], %[[TRUE_BB]] ], [ %[[B]], %[[FALSE_BB]] ] +// OGCG: %[[OLD:.*]] = load i32, ptr %[[PTR]] +// OGCG: %[[NEW:.*]] = add{{.*}} i32 %[[OLD]], 3 +// OGCG: store i32 %[[NEW]], ptr %[[PTR]] + +// Test constant folding - compile-time true condition with lvalue assignment +void test_cond_const_true_lvalue() { + int a = 1; + int b = 2; + (true ? a : b) = 99; +} + +// CIR-LABEL: cir.func{{.*}} @_Z27test_cond_const_true_lvaluev( +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[B:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] +// CIR-NOT: cir.ternary +// CIR: %[[NINETYNINE:.*]] = cir.const #cir.int<99> : !s32i +// CIR: cir.store{{.*}} %[[NINETYNINE]], %[[A]] : !s32i, !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} void @_Z27test_cond_const_true_lvaluev( +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: %[[B:.*]] = alloca i32 +// LLVM-NOT: br i1 +// LLVM: store i32 99, ptr %[[A]] + +// OGCG-LABEL: define{{.*}} void @_Z27test_cond_const_true_lvaluev( +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: %[[B:.*]] = alloca i32 +// OGCG-NOT: br i1 +// OGCG: store i32 99, ptr %[[A]] + +// Test constant folding - compile-time false condition with lvalue assignment +void test_cond_const_false_lvalue() { + int a = 1; + int b = 2; + (false ? a : b) = 88; +} + +// CIR-LABEL: cir.func{{.*}} @_Z28test_cond_const_false_lvaluev( +// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[B:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] +// CIR-NOT: cir.ternary +// CIR: %[[EIGHTYEIGHT:.*]] = cir.const #cir.int<88> : !s32i +// CIR: cir.store{{.*}} %[[EIGHTYEIGHT]], %[[B]] : !s32i, !cir.ptr<!s32i> + +// LLVM-LABEL: define{{.*}} void @_Z28test_cond_const_false_lvaluev( +// LLVM: %[[A:.*]] = alloca i32 +// LLVM: %[[B:.*]] = alloca i32 +// LLVM-NOT: br i1 +// LLVM: store i32 88, ptr %[[B]] + +// OGCG-LABEL: define{{.*}} void @_Z28test_cond_const_false_lvaluev( +// OGCG: %[[A:.*]] = alloca i32 +// OGCG: %[[B:.*]] = alloca i32 +// OGCG-NOT: br i1 +// OGCG: store i32 88, ptr %[[B]] diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp index ee4fffe..c1c2e4b 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp @@ -161,7 +161,78 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -188,7 +259,78 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -724,7 +866,100 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -826,7 +1061,100 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1510,6 +1838,106 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1558,6 +1986,106 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp index 472e4ac..853f345 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp @@ -46,7 +46,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -59,7 +69,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -227,7 +247,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -258,7 +309,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -484,7 +566,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -520,7 +637,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp index 112ff656..67e8460 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp @@ -17,7 +17,7 @@ struct HasOperatorsInline { bool &operator&&(HasOperatorsInline& other); bool &operator||(HasOperatorsInline& other); // For min/max - HasOperatorsInline &operator<(HasOperatorsInline& other); + bool operator<(HasOperatorsInline& other); HasOperatorsInline &operator=(HasOperatorsInline& other); }; @@ -110,7 +110,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -119,7 +125,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <min> init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -141,7 +147,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -631,7 +643,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -753,7 +792,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -1528,6 +1594,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1605,6 +1703,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp index 7eaa822b..d74de82 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp @@ -47,7 +47,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -60,7 +70,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -280,7 +300,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -311,7 +362,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -700,7 +782,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -736,7 +853,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp index c2c0c77..a6df6c0 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp @@ -7,7 +7,7 @@ struct HasOperatorsOutline { bool b; ~HasOperatorsOutline(); -HasOperatorsOutline &operator=(const HasOperatorsOutline &); + HasOperatorsOutline &operator=(const HasOperatorsOutline &); }; HasOperatorsOutline &operator+=(HasOperatorsOutline &, HasOperatorsOutline &); @@ -18,7 +18,7 @@ HasOperatorsOutline &operator^=(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator&&(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator||(HasOperatorsOutline &, HasOperatorsOutline &); // For min/max -HasOperatorsOutline &operator<(HasOperatorsOutline &, HasOperatorsOutline &); +bool operator<(HasOperatorsOutline &, HasOperatorsOutline &); template<typename T> void acc_combined() { @@ -109,7 +109,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -118,7 +124,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -140,7 +146,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -630,7 +642,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -752,7 +791,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -1527,6 +1593,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1604,6 +1702,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c index b439623..d65d5d4 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c @@ -160,7 +160,81 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; @@ -187,7 +261,81 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; @@ -710,7 +858,103 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -812,7 +1056,104 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -1467,6 +1808,110 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -1515,6 +1960,110 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp index f997902..f32fa2d 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp @@ -161,7 +161,78 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; @@ -188,7 +259,78 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; @@ -724,7 +866,100 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -826,7 +1061,100 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -1510,6 +1838,106 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -1558,6 +1986,106 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c index 3e4583f..9f73367 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c @@ -45,7 +45,18 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; @@ -58,7 +69,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; @@ -226,7 +247,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -257,7 +309,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -483,7 +566,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -519,7 +637,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp index 833cfad..ffd2631 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp @@ -47,7 +47,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; @@ -60,7 +70,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; @@ -228,7 +248,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -259,7 +310,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -485,7 +567,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -521,6 +638,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp index ec4372d..1e367ee 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp @@ -17,7 +17,7 @@ struct HasOperatorsInline { bool &operator&&(HasOperatorsInline& other); bool &operator||(HasOperatorsInline& other); // For min/max - HasOperatorsInline &operator<(HasOperatorsInline& other); + bool operator<(HasOperatorsInline& other); HasOperatorsInline &operator=(HasOperatorsInline& other); }; @@ -110,7 +110,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -119,7 +125,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <min> init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -141,7 +147,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -631,7 +643,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -753,7 +792,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -1528,6 +1594,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1605,6 +1703,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c index 0cee5c6..2f42a5c 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c @@ -46,7 +46,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -59,7 +69,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -279,7 +299,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -310,7 +361,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -699,7 +781,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -735,7 +852,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp index 822dd9f6..af7bcf3 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp @@ -47,7 +47,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -60,7 +70,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -280,7 +300,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -311,7 +362,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -700,7 +782,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -736,7 +853,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp index 873bf51..ec890e2 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp @@ -18,7 +18,7 @@ HasOperatorsOutline &operator^=(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator&&(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator||(HasOperatorsOutline &, HasOperatorsOutline &); // For min/max -HasOperatorsOutline &operator<(HasOperatorsOutline &, HasOperatorsOutline &); +bool operator<(HasOperatorsOutline &, HasOperatorsOutline &); template<typename T> void acc_compute() { @@ -109,7 +109,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -118,7 +124,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -140,7 +146,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -630,7 +642,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -752,7 +791,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -1527,6 +1593,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1604,6 +1702,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c index b2d1362..08daa70 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c @@ -46,7 +46,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !u32i, !cir.ptr<!u32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i> // CHECK-NEXT: } ; @@ -59,7 +69,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !u32i, !cir.ptr<!u32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i> // CHECK-NEXT: } ; @@ -278,7 +298,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -309,7 +360,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -698,7 +780,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -734,7 +851,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp index 349e0fb..1a77c0f 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp @@ -161,7 +161,78 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -188,7 +259,78 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -724,7 +866,100 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -826,7 +1061,100 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1510,6 +1838,106 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1558,6 +1986,106 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp index 8d9269b..7faef71 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp @@ -47,7 +47,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -60,7 +70,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr<!cir.float> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -228,7 +248,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -259,7 +310,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -485,7 +567,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -521,7 +638,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp index 1c89515..43c9fbbc 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp @@ -17,7 +17,7 @@ struct HasOperatorsInline { bool &operator&&(HasOperatorsInline& other); bool &operator||(HasOperatorsInline& other); // For min/max - HasOperatorsInline &operator<(HasOperatorsInline& other); + bool operator<(HasOperatorsInline& other); HasOperatorsInline &operator=(HasOperatorsInline& other); }; @@ -110,7 +110,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -119,7 +125,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <min> init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -141,7 +147,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -631,7 +643,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -753,7 +792,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -1528,6 +1594,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1605,6 +1703,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp index 72e9d1f..5353218 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp @@ -47,7 +47,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -60,7 +70,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr<!s32i> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -280,7 +300,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -311,7 +362,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -700,7 +782,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -736,7 +853,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp index a36d41c1..e193cfa 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp @@ -18,7 +18,7 @@ HasOperatorsOutline &operator^=(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator&&(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator||(HasOperatorsOutline &, HasOperatorsOutline &); // For min/max -HasOperatorsOutline &operator<(HasOperatorsOutline &, HasOperatorsOutline &); +bool operator<(HasOperatorsOutline &, HasOperatorsOutline &); template<typename T> void acc_loop() { @@ -109,7 +109,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -118,7 +124,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -140,7 +146,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -630,7 +642,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -752,7 +791,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -1527,6 +1593,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1604,6 +1702,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index bcffd861..26aed772 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1070,19 +1070,25 @@ __m128d test_mm256_extractf128_pd(__m256d A) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3> return _mm256_extractf128_pd(A, 1); } +TEST_CONSTEXPR(match_m128d(_mm256_extractf128_pd(((__m256d){0.0, 1.0, 2.0, 3.0}), 1), + 2.0, 3.0)); __m128 test_mm256_extractf128_ps(__m256 A) { // CHECK-LABEL: test_mm256_extractf128_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extractf128_ps(A, 1); } +TEST_CONSTEXPR(match_m128(_mm256_extractf128_ps(((__m256){0,1,2,3,4,5,6,7}), 1), + 4.0f, 5.0f, 6.0f, 7.0f)); __m128i test_mm256_extractf128_si256(__m256i A) { // CHECK-LABEL: test_mm256_extractf128_si256 // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extractf128_si256(A, 1); } - +TEST_CONSTEXPR(match_m128i(_mm256_extractf128_si256(((__m256i){0ULL, 1ULL, 2ULL, 3ULL}), 1), + 2ULL, 3ULL)); + __m256d test_mm256_floor_pd(__m256d x) { // CHECK-LABEL: test_mm256_floor_pd // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1) @@ -1348,12 +1354,16 @@ int test_mm256_movemask_pd(__m256d A) { // CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}}) return _mm256_movemask_pd(A); } +TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-1234.5678901234, 98765.4321098765, 0.000123456789, -3.14159265358979}) == 0x9); +TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-0.000000987654321, -99999.999999999, 42.424242424242, 314159.2653589793}) == 0x3); int test_mm256_movemask_ps(__m256 A) { // CHECK-LABEL: test_mm256_movemask_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}}) return _mm256_movemask_ps(A); } +TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){-12.3456f, 34.7890f, -0.0001234f, 123456.78f, -987.654f, 0.001234f, 3.14159f, -256.001f}) == 0x95); +TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){0.333333f, -45.6789f, 999.999f, -0.9999f, 17.234f, -128.512f, 2048.0f, -3.14f}) == 0xAA); __m256d test_mm256_mul_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_mul_pd diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index a505d70..03b1bde 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -466,6 +466,7 @@ __m128i test0_mm256_extracti128_si256_0(__m256i a) { // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 0, i32 1> return _mm256_extracti128_si256(a, 0); } +TEST_CONSTEXPR(match_m128i(_mm256_extracti128_si256(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), 0),1ULL, 2ULL)); __m128i test1_mm256_extracti128_si256_1(__m256i a) { // CHECK-LABEL: test1_mm256_extracti128_si256 @@ -992,6 +993,9 @@ int test_mm256_movemask_epi8(__m256i a) { // CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}}) return _mm256_movemask_epi8(a); } +TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v32qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01,0xB6,0x00,0x39,0x40,0xD0,0x05,0x80,0x2A,0x7B,0x00,0x90,0xFF,0x01,0x34,0xC0,0x6D}) == 0x4C516AAA); +TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v8si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001,(int)0x00000000,(int)0xFFFFFFFF,(int)0x12345678,(int)0x90ABCDEF}) == 0xF0F08F3D); +TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v4du){0xFF00000000000080ULL,0x7F010203040506C3ULL,0x8000000000000000ULL,0x0123456789ABCDEFULL}) == 0x0F800181); __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) { // CHECK-LABEL: test_mm256_mpsadbw_epu8 diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 1b09959..9c4ada3 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -1415,6 +1415,7 @@ __m256 test_mm512_extractf32x8_ps(__m512 __A) { // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm512_extractf32x8_ps(__A, 1); } +TEST_CONSTEXPR(match_m256(_mm512_extractf32x8_ps(((__m512){0.0f,1.0f,2.0f,3.0f, 4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f, 12.0f,13.0f,14.0f,15.0f}), 1),8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)); __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) { // CHECK-LABEL: test_mm512_mask_extractf32x8_ps @@ -1422,6 +1423,7 @@ __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_m256(_mm512_mask_extractf32x8_ps(((__m256)(__v8sf){0,0,0,0,0,0,0,0}), (__mmask8)0xFF,((__m512)(__v16sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f}),1),8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)); __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) { // CHECK-LABEL: test_mm512_maskz_extractf32x8_ps @@ -1429,12 +1431,14 @@ __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm512_maskz_extractf32x8_ps(__U, __A, 1); } +TEST_CONSTEXPR(match_m256(_mm512_maskz_extractf32x8_ps((__mmask8)0x0F, ((__m512)(__v16sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f}),1),8.0f, 9.0f, 10.0f, 11.0f, 0.0f, 0.0f, 0.0f, 0.0f)); __m128d test_mm512_extractf64x2_pd(__m512d __A) { // CHECK-LABEL: test_mm512_extractf64x2_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7> return _mm512_extractf64x2_pd(__A, 3); } +TEST_CONSTEXPR(match_m128d(_mm512_extractf64x2_pd(((__m512d){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0}), 3),6.0, 7.0)); __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_extractf64x2_pd @@ -1442,6 +1446,7 @@ __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3); } +TEST_CONSTEXPR(match_m128d(_mm512_mask_extractf64x2_pd(((__m128d)(__v2df){100.0, 101.0}),(__mmask8)0x1,((__m512d)(__v8df){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0}),3),6.0, 101.0)); __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_maskz_extractf64x2_pd @@ -1449,12 +1454,14 @@ __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm512_maskz_extractf64x2_pd(__U, __A, 3); } +TEST_CONSTEXPR(match_m128d(_mm512_maskz_extractf64x2_pd((__mmask8)0x2,((__m512d){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0}),3),0.0, 7.0)); __m256i test_mm512_extracti32x8_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_extracti32x8_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm512_extracti32x8_epi32(__A, 1); } +TEST_CONSTEXPR(match_v8si(_mm512_extracti32x8_epi32(((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 1),8, 9,10,11,12,13,14,15)); __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_extracti32x8_epi32 @@ -1462,6 +1469,7 @@ __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_v8si(_mm512_mask_extracti32x8_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), (__mmask8)0xAA,((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),100, 9, 102, 11, 104, 13, 106, 15)); __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_extracti32x8_epi32 @@ -1469,12 +1477,14 @@ __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_maskz_extracti32x8_epi32(__U, __A, 1); } +TEST_CONSTEXPR(match_v8si(_mm512_maskz_extracti32x8_epi32((__mmask8)0x0F,((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),8, 9, 10, 11, 0, 0, 0, 0)); __m128i test_mm512_extracti64x2_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_extracti64x2_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7> return _mm512_extracti64x2_epi64(__A, 3); } +TEST_CONSTEXPR(match_m128i(_mm512_extracti64x2_epi64(((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL}), 3),6ULL, 7ULL)); __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_extracti64x2_epi64 @@ -1482,6 +1492,7 @@ __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3); } +TEST_CONSTEXPR(match_m128i(_mm512_mask_extracti64x2_epi64(((__m128i)(__v2di){100ULL, 101ULL}), (__mmask8)0x1,((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL}),3),6ULL, 101ULL)); __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_extracti64x2_epi64 @@ -1489,6 +1500,7 @@ __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm512_maskz_extracti64x2_epi64(__U, __A, 3); } +TEST_CONSTEXPR(match_m128i(_mm512_maskz_extracti64x2_epi64((__mmask8)0x2,((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL}),3),0ULL, 7ULL)); __m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) { // CHECK-LABEL: test_mm512_insertf32x8 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 6073fdf..122dadd 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -2452,6 +2452,7 @@ __m256d test_mm512_extractf64x4_pd(__m512d a) // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm512_extractf64x4_pd(a, 1); } +TEST_CONSTEXPR(match_m256d(_mm512_extractf64x4_pd(((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}),1),4.0, 5.0, 6.0, 7.0)); __m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){ // CHECK-LABEL: test_mm512_mask_extractf64x4_pd @@ -2459,6 +2460,7 @@ __m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1); } +TEST_CONSTEXPR(match_m256d(_mm512_mask_extractf64x4_pd(((__m256d){100.0,101.0,102.0,103.0}), (__mmask8)0x5,((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}), 1), 4.0, 101.0, 6.0, 103.0)); __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ // CHECK-LABEL: test_mm512_maskz_extractf64x4_pd @@ -2466,6 +2468,7 @@ __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm512_maskz_extractf64x4_pd( __U, __A, 1); } +TEST_CONSTEXPR(match_m256d(_mm512_maskz_extractf64x4_pd((__mmask8)0x3,((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}),1),4.0, 5.0, 0.0, 0.0)); __m128 test_mm512_extractf32x4_ps(__m512 a) { @@ -2473,6 +2476,7 @@ __m128 test_mm512_extractf32x4_ps(__m512 a) // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm512_extractf32x4_ps(a, 1); } +TEST_CONSTEXPR(match_m128(_mm512_extractf32x4_ps(((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),4.0f, 5.0f, 6.0f, 7.0f)); __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){ // CHECK-LABEL: test_mm512_mask_extractf32x4_ps @@ -2480,6 +2484,7 @@ __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1); } +TEST_CONSTEXPR(match_m128(_mm512_mask_extractf32x4_ps(((__m128){100,101,102,103}),(__mmask8)0x5,((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),4.0f, 101.0f, 6.0f, 103.0f)); __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512 __A){ // CHECK-LABEL: test_mm512_maskz_extractf32x4_ps @@ -2487,6 +2492,7 @@ __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512 __A){ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm512_maskz_extractf32x4_ps(__U, __A, 1); } +TEST_CONSTEXPR(match_m128(_mm512_maskz_extractf32x4_ps((__mmask8)0x3,((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),4.0f, 5.0f, 0.0f, 0.0f)); __mmask16 test_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_cmpeq_epu32_mask @@ -7363,6 +7369,7 @@ __m128i test_mm512_extracti32x4_epi32(__m512i __A) { // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15> return _mm512_extracti32x4_epi32(__A, 3); } +TEST_CONSTEXPR(match_m128i(_mm512_extracti32x4_epi32(((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 3), 0x0000000D0000000CULL, 0x0000000F0000000EULL)); __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_extracti32x4_epi32 @@ -7370,6 +7377,7 @@ __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3); } +TEST_CONSTEXPR(match_m128i(_mm512_mask_extracti32x4_epi32(((__m128i)(__v4si){100,101,102,103}), (__mmask8)0x5, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 3), 0x000000650000000CULL, 0x000000670000000EULL)); __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_extracti32x4_epi32 @@ -7377,12 +7385,14 @@ __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_maskz_extracti32x4_epi32(__U, __A, 3); } +TEST_CONSTEXPR(match_m128i(_mm512_maskz_extracti32x4_epi32((__mmask8)0x3, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 3), 0x0000000D0000000CULL, 0x0000000000000000ULL)); __m256i test_mm512_extracti64x4_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_extracti64x4_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm512_extracti64x4_epi64(__A, 1); } +TEST_CONSTEXPR(match_m256i(_mm512_extracti64x4_epi64(((__m512i)(__v8di){0,1,2,3,4,5,6,7}), 1), 4ULL, 5ULL, 6ULL, 7ULL)); __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_extracti64x4_epi64 @@ -7390,6 +7400,7 @@ __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_m256i(_mm512_mask_extracti64x4_epi64(((__m256i)(__v4di){100ULL,101ULL,102ULL,103ULL}), (__mmask8)0x5, (((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL})), 1), 4ULL, 101ULL, 6ULL, 103ULL)); __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_extracti64x4_epi64 @@ -7397,7 +7408,7 @@ __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_maskz_extracti64x4_epi64(__U, __A, 1); } - +TEST_CONSTEXPR(match_m256i(_mm512_maskz_extracti64x4_epi64((__mmask8)0x3, (((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL})), 1), 4ULL, 5ULL, 0ULL, 0ULL)); __m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) { // CHECK-LABEL: test_mm512_insertf64x4 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 9819dbd..34db764 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -9911,6 +9911,7 @@ __m128 test_mm256_extractf32x4_ps(__m256 __A) { // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extractf32x4_ps(__A, 1); } +TEST_CONSTEXPR(match_m128(_mm256_extractf32x4_ps(((__m256){0,1,2,3, 4,5,6,7}), 1),4.0f, 5.0f, 6.0f, 7.0f)); __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_mask_extractf32x4_ps @@ -9918,6 +9919,7 @@ __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_m128(_mm256_mask_extractf32x4_ps((((__m128){100,101,102,103})), (__mmask8)0x5, (((__m256){0,1,2,3, 4,5,6,7})), 1), 4.0f, 101.0f, 6.0f, 103.0f)); __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_maskz_extractf32x4_ps @@ -9925,12 +9927,14 @@ __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_maskz_extractf32x4_ps(__U, __A, 1); } +TEST_CONSTEXPR(match_m128(_mm256_maskz_extractf32x4_ps((__mmask8)0x3, (((__m256){0,1,2,3, 4,5,6,7})), 1), 4.0f, 5.0f, 0.0f, 0.0f)); __m128i test_mm256_extracti32x4_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_extracti32x4_epi32 // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extracti32x4_epi32(__A, 1); } +TEST_CONSTEXPR(match_m128i(_mm256_extracti32x4_epi32((((__m256i)(__v8si){0,1,2,3, 4,5,6,7})), 1), 0x0000000500000004ULL, 0x0000000700000006ULL)); __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_extracti32x4_epi32 @@ -9938,6 +9942,7 @@ __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_m128i(_mm256_mask_extracti32x4_epi32((((__m128i)(__v4si){100,101,102,103})), (__mmask8)0xA, (((__m256i)(__v8si){0,1,2,3, 4,5,6,7})), 1),0x0000000500000064ULL, 0x0000000700000066ULL)); __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_extracti32x4_epi32 @@ -9945,6 +9950,7 @@ __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm256_maskz_extracti32x4_epi32(__U, __A, 1); } +TEST_CONSTEXPR(match_m128i(_mm256_maskz_extracti32x4_epi32((__mmask8)0x3, (((__m256i)(__v8si){0,1,2,3, 4,5,6,7})), 1), 0x0000000500000004ULL, 0x0000000000000000ULL)); __m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) { // CHECK-LABEL: test_mm256_insertf32x4 diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index 4773b60..1bfc022 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -1101,6 +1101,7 @@ __m128d test_mm256_extractf64x2_pd(__m256d __A) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3> return _mm256_extractf64x2_pd(__A, 1); } +TEST_CONSTEXPR(match_m128d(_mm256_extractf64x2_pd(((__m256d){0.0,1.0,2.0,3.0}), 1), 2.0, 3.0)); __m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_mask_extractf64x2_pd @@ -1108,6 +1109,7 @@ __m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_m128d(_mm256_mask_extractf64x2_pd((((__m128d){100.0, 101.0})), (__mmask8)0x1, (((__m256d){0.0,1.0,2.0,3.0})),1), 2.0, 101.0)); __m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_maskz_extractf64x2_pd @@ -1115,12 +1117,14 @@ __m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm256_maskz_extractf64x2_pd(__U, __A, 1); } +TEST_CONSTEXPR(match_m128d(_mm256_maskz_extractf64x2_pd((__mmask8)0x2,(((__m256d){0.0,1.0,2.0,3.0})),1), 0.0, 3.0)); __m128i test_mm256_extracti64x2_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_extracti64x2_epi64 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 2, i32 3> return _mm256_extracti64x2_epi64(__A, 1); } +TEST_CONSTEXPR(match_m128i(_mm256_extracti64x2_epi64(((__m256i){0ULL,1ULL,2ULL,3ULL}), 1), 2ULL, 3ULL)); __m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_extracti64x2_epi64 @@ -1128,6 +1132,7 @@ __m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_m128i(_mm256_mask_extracti64x2_epi64((((__m128i){100ULL, 101ULL})), (__mmask8)0x1, (((__m256i){0ULL,1ULL,2ULL,3ULL})), 1), 2ULL, 101ULL)); __m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_extracti64x2_epi64 @@ -1135,6 +1140,7 @@ __m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm256_maskz_extracti64x2_epi64(__U, __A, 1); } +TEST_CONSTEXPR(match_m128i(_mm256_maskz_extracti64x2_epi64((__mmask8)0x2, (((__m256i){0ULL,1ULL,2ULL,3ULL})),1), 0ULL, 3ULL)); __m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) { // CHECK-LABEL: test_mm256_insertf64x2 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index c1ac57b..7674255 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -409,6 +409,10 @@ int test_mm_movemask_pi8(__m64 a) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128( return _mm_movemask_pi8(a); } +TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v8qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3})) == 0xAA); +TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})) == 0x3D); +TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v1di){(long long)0xE110837A00924DB0ULL})) == 0xA5); + __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mul_su32 diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 3bad342..f5c1d00 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -561,6 +561,8 @@ int test_mm_movemask_ps(__m128 A) { // CHECK: call {{.*}}i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}}) return _mm_movemask_ps(A); } +TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-2.0f, 3.0f, -5.5f, -0.0f}) == 0xD); +TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-7.348215e5, 0.00314159, -12.789, 2.7182818}) == 0x5); __m128 test_mm_mul_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_mul_ps diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index ade7ef3..8e4fb86 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -956,12 +956,17 @@ int test_mm_movemask_epi8(__m128i A) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}}) return _mm_movemask_epi8(A); } +TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v16qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01}) == 0x6AAA); +TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v4si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001}) == 0x8F3D); +TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v2du){0xFF00000000000080ULL,0x7F010203040506C3ULL}) == 0x181); int test_mm_movemask_pd(__m128d A) { // CHECK-LABEL: test_mm_movemask_pd // CHECK: call {{.*}}i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}}) return _mm_movemask_pd(A); } +TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){-12345.67890123, 4567.89012345}) == 0x1); +TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){0.0000987654321, 09876.5432109876}) == 0x0); __m128i test_mm_mul_epu32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_mul_epu32 diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 89a7ac2..62cd392 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -376,6 +376,16 @@ __m128i test_mm_minpos_epu16(__m128i x) { // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}}) return _mm_minpos_epu16(x); } +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1,0,0,0, 0,0,0,0}), 0,1,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){65535,65535,65535,65535,65535,65535,65535,65535}), 65535,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){9,8,7,6,5,4,3,2}), 2,7,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,5,5,5,5,5,5,5}), 5,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,7,9,4,10,4,11,12}), 4,3,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){6,0,0,0,0,0,0,0}), 0,1,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1000,2000,3000,4000,5000,6000,7000,1}), 1,7,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1234,5678,42,9999,65535,0,4242,42}), 0,5,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){400,500,12,600,12,700,800,900}), 12,2,0,0, 0,0,0,0)); __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mpsadbw_epu8 diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl new file mode 100644 index 0000000..be822a6 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5 +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1250 -disable-llvm-passes -fno-ident -emit-llvm %s -o - | FileCheck %s + +kernel void foo(global int *p) { *p = 1; } +// CHECK: Function Attrs: convergent norecurse nounwind +// CHECK-LABEL: define dso_local amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: call void @__clang_ocl_kern_imp_foo(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: alwaysinline convergent norecurse nounwind +// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_foo( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: ret void +// +//. +// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" } +// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" } +// CHECK: attributes #[[ATTR2]] = { convergent nounwind } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META2:![0-9]+]] = !{i32 2, i32 0} +// CHECK: [[META3]] = !{i32 1} +// CHECK: [[META4]] = !{!"none"} +// CHECK: [[META5]] = !{!"int*"} +// CHECK: [[META6]] = !{!""} +// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"any pointer", [[META10:![0-9]+]], i64 0} +// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} +// CHECK: [[META11]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[META13]] = !{!"int", [[META10]], i64 0} +//. diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index dac3649..a3c3697 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -1911,7 +1911,6 @@ // CHECK_GNR_M32: #define __TSXLDTRK__ 1 // CHECK_GNR_M32: #define __UINTR__ 1 // CHECK_GNR_M32-NOT: #define __USERMSR__ 1 -// CHECK_DMR_M32: #define __USERMSR__ 1 // CHECK_GNR_M32: #define __VAES__ 1 // CHECK_GNR_M32: #define __VPCLMULQDQ__ 1 // CHECK_GNR_M32: #define __WAITPKG__ 1 @@ -2018,7 +2017,6 @@ // CHECK_GNR_M64: #define __TSXLDTRK__ 1 // CHECK_GNR_M64: #define __UINTR__ 1 // CHECK_GNR_M64-NOT: #define __USERMSR__ 1 -// CHECK_DMR_M64: #define __USERMSR__ 1 // CHECK_GNR_M64: #define __VAES__ 1 // CHECK_GNR_M64: #define __VPCLMULQDQ__ 1 // CHECK_GNR_M64: #define __WAITPKG__ 1 diff --git a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp index 1b50336..c406644 100644 --- a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp @@ -31,10 +31,12 @@ void uses(unsigned Parm) { #pragma acc serial loop reduction(&: CoS, I, F) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@-2{{while forming binary operator '&='}} + // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-error@-4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-6{{while forming binary operator '&='}} + // expected-error@-7{{invalid operands to binary expression ('float' and 'float')}} for(int i = 0; i < 5; ++i); #pragma acc kernels loop reduction(min: CoS, Array[I], Array[0:I]) diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c index 96c01d0..cb490ed 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c @@ -59,10 +59,12 @@ void uses(unsigned Parm) { // Vars in a reduction must be a scalar or a composite of scalars. #pragma acc parallel reduction(&: CoS, I, F) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@-2{{while forming binary operator '&='}} + // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-error@-4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-6{{while forming binary operator '&='}} + // expected-error@-7{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -76,15 +78,17 @@ void uses(unsigned Parm) { #pragma acc parallel reduction(&: CoS, Array[I], Array[0:I]) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); struct CompositeHasComposite ChCArray[5]; - // expected-error@+6{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} + // expected-error@+7{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} - // expected-note@+4{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} - // expected-error@+3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@+5{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} + // expected-error@+4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} + // expected-note@+2{{while forming binary operator '&='}} // expected-error@+1{{invalid operands to binary expression ('float' and 'float')}} #pragma acc parallel reduction(&: CoS, Array[I], ChCArray[0:I]) while (1); diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp index e3a487a..70dc3d6d 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp @@ -62,10 +62,12 @@ void uses(unsigned Parm) { // Vars in a reduction must be a scalar or a composite of scalars. #pragma acc parallel reduction(&: CoS, I, F) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@-2{{while forming binary operator '&='}} + // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-error@-4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-6{{while forming binary operator '&='}} + // expected-error@-7{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -78,7 +80,8 @@ void uses(unsigned Parm) { #pragma acc parallel reduction(&: CoS, Array[I], Array[0:I]) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}} @@ -221,7 +224,8 @@ void TemplUses(T Parm, U CoS, V ChC) { #pragma acc parallel reduction(&: CoS, Var, Parm) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -236,7 +240,8 @@ void TemplUses(T Parm, U CoS, V ChC) { #pragma acc parallel reduction(&: CoS, Array[Var], Array[0:Var]) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}} diff --git a/clang/test/Unit/lit.cfg.py b/clang/test/Unit/lit.cfg.py index 37e91d0..ebe35a1 100644 --- a/clang/test/Unit/lit.cfg.py +++ b/clang/test/Unit/lit.cfg.py @@ -51,7 +51,7 @@ def find_shlibpath_var(): yield "LD_LIBRARY_PATH" elif platform.system() == "Darwin": yield "DYLD_LIBRARY_PATH" - elif platform.system() == "Windows": + elif platform.system() == "Windows" or sys.platform == "cygwin": yield "PATH" elif platform.system() == "AIX": yield "LIBPATH" diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py index 52b275c..29088ef 100644 --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -18,22 +18,11 @@ from lit.llvm.subst import FindTool # name: The name of this test suite. config.name = "Clang" -# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. -# See https://github.com/llvm/llvm-project/issues/106636 for more details. -# -# We prefer the lit internal shell which provides a better user experience on failures -# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 -# env var. -use_lit_shell = True -lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") -if lit_shell_env: - use_lit_shell = lit.util.pythonize_bool(lit_shell_env) - # testFormat: The test format to use to interpret tests. # # For now we require '&&' between commands, until they get globally killed and # the test runner updated. -config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) +config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) # suffixes: A list of file extensions to treat as test files. config.suffixes = [ diff --git a/clang/unittests/Driver/MultilibBuilderTest.cpp b/clang/unittests/Driver/MultilibBuilderTest.cpp index 38c9344..0c1e806 100644 --- a/clang/unittests/Driver/MultilibBuilderTest.cpp +++ b/clang/unittests/Driver/MultilibBuilderTest.cpp @@ -91,7 +91,7 @@ TEST(MultilibBuilderTest, SetConstruction2) { ASSERT_TRUE(MS.size() == 4); for (MultilibSet::const_iterator I = MS.begin(), E = MS.end(); I != E; ++I) { ASSERT_TRUE(llvm::StringSwitch<bool>(I->gccSuffix()) - .Cases("", "/sof", "/el", "/sof/el", true) + .Cases({"", "/sof", "/el", "/sof/el"}, true) .Default(false)) << "Multilib " << *I << " wasn't expected"; ASSERT_TRUE(llvm::StringSwitch<bool>(I->gccSuffix()) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a3ad978..ce68f91 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -20777,6 +20777,30 @@ TEST_F(FormatTest, AlignWithLineBreaks) { "}", Style); // clang-format on + + Style = getLLVMStyleWithColumns(70); + Style.AlignConsecutiveDeclarations.Enabled = true; + verifyFormat( + "ReturnType\n" + "MyFancyIntefaceFunction(Context *context,\n" + " ALongTypeName *response) noexcept override;\n" + "ReturnType func();", + Style); + + verifyFormat( + "ReturnType\n" + "MyFancyIntefaceFunction(B<int> *context,\n" + " decltype(AFunc) *response) noexcept override;\n" + "ReturnType func();", + Style); + + Style.AlignConsecutiveAssignments.Enabled = true; + Style.ColumnLimit = 15; + verifyFormat("int i1 = 1;\n" + "k = bar(\n" + " argument1,\n" + " argument2);", + Style); } TEST_F(FormatTest, AlignWithInitializerPeriods) { diff --git a/clang/utils/TableGen/ClangOptionDocEmitter.cpp b/clang/utils/TableGen/ClangOptionDocEmitter.cpp index 8a31686..d779c84 100644 --- a/clang/utils/TableGen/ClangOptionDocEmitter.cpp +++ b/clang/utils/TableGen/ClangOptionDocEmitter.cpp @@ -173,11 +173,11 @@ Documentation extractDocumentation(const RecordKeeper &Records, // Get the first and successive separators to use for an OptionKind. std::pair<StringRef,StringRef> getSeparatorsForKind(const Record *OptionKind) { return StringSwitch<std::pair<StringRef, StringRef>>(OptionKind->getName()) - .Cases("KIND_JOINED", "KIND_JOINED_OR_SEPARATE", - "KIND_JOINED_AND_SEPARATE", - "KIND_REMAINING_ARGS_JOINED", {"", " "}) - .Case("KIND_COMMAJOINED", {"", ","}) - .Default({" ", " "}); + .Cases({"KIND_JOINED", "KIND_JOINED_OR_SEPARATE", + "KIND_JOINED_AND_SEPARATE", "KIND_REMAINING_ARGS_JOINED"}, + {"", " "}) + .Case("KIND_COMMAJOINED", {"", ","}) + .Default({" ", " "}); } const unsigned UnlimitedArgs = unsigned(-1); @@ -186,12 +186,13 @@ const unsigned UnlimitedArgs = unsigned(-1); // arguments are accepted. unsigned getNumArgsForKind(const Record *OptionKind, const Record *Option) { return StringSwitch<unsigned>(OptionKind->getName()) - .Cases("KIND_JOINED", "KIND_JOINED_OR_SEPARATE", "KIND_SEPARATE", 1) - .Cases("KIND_REMAINING_ARGS", "KIND_REMAINING_ARGS_JOINED", - "KIND_COMMAJOINED", UnlimitedArgs) - .Case("KIND_JOINED_AND_SEPARATE", 2) - .Case("KIND_MULTIARG", Option->getValueAsInt("NumArgs")) - .Default(0); + .Cases({"KIND_JOINED", "KIND_JOINED_OR_SEPARATE", "KIND_SEPARATE"}, 1) + .Cases({"KIND_REMAINING_ARGS", "KIND_REMAINING_ARGS_JOINED", + "KIND_COMMAJOINED"}, + UnlimitedArgs) + .Case("KIND_JOINED_AND_SEPARATE", 2) + .Case("KIND_MULTIARG", Option->getValueAsInt("NumArgs")) + .Default(0); } std::string escapeRST(StringRef Str) { diff --git a/compiler-rt/lib/builtins/gcc_personality_v0.c b/compiler-rt/lib/builtins/gcc_personality_v0.c index ef63a5f..3ed17fa 100644 --- a/compiler-rt/lib/builtins/gcc_personality_v0.c +++ b/compiler-rt/lib/builtins/gcc_personality_v0.c @@ -30,6 +30,54 @@ EXCEPTION_DISPOSITION _GCC_specific_handler(PEXCEPTION_RECORD, void *, PCONTEXT, _Unwind_Personality_Fn); #endif +#if __has_feature(ptrauth_calls) +#include <ptrauth.h> + +// `__ptrauth_restricted_intptr` is a feature of apple clang that predates +// support for direct application of `__ptrauth` to integer types. This +// guard is necessary to support compilation with those compiler. +#if __has_feature(ptrauth_restricted_intptr_qualifier) +#define __ptrauth_gcc_personality_intptr(key, addressDiscriminated, \ + discriminator) \ + __ptrauth_restricted_intptr(key, addressDiscriminated, discriminator) +#else +#define __ptrauth_gcc_personality_intptr(key, addressDiscriminated, \ + discriminator) \ + __ptrauth(key, addressDiscriminated, discriminator) +#endif +#else +#define __ptrauth_gcc_personality_intptr(...) +#endif + +#define __ptrauth_gcc_personality_func_key ptrauth_key_function_pointer + +// ptrauth_string_discriminator("__gcc_personality_v0'funcStart") == 0xDFEB +#define __ptrauth_gcc_personality_func_start \ + __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1, \ + 0xDFEB) + +// ptrauth_string_discriminator("__gcc_personality_v0'start") == 0x52DC +#define __ptrauth_gcc_personality_start \ + __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1, \ + 0x52DC) + +// ptrauth_string_discriminator("__gcc_personality_v0'length") == 0xFFF7 +#define __ptrauth_gcc_personality_length \ + __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1, \ + 0xFFF7) + +// ptrauth_string_discriminator("__gcc_personality_v0'landingPadOffset") == +// 0x6498 +#define __ptrauth_gcc_personality_lpoffset \ + __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1, \ + 0x6498) + +// ptrauth_string_discriminator("__gcc_personality_v0'landingPad") == 0xA134 +#define __ptrauth_gcc_personality_lpad_disc 0xA134 +#define __ptrauth_gcc_personality_lpad \ + __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1, \ + __ptrauth_gcc_personality_lpad_disc) + // Pointer encodings documented at: // http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html @@ -205,7 +253,8 @@ COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( return continueUnwind(exceptionObject, context); uintptr_t pc = (uintptr_t)_Unwind_GetIP(context) - 1; - uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context); + uintptr_t __ptrauth_gcc_personality_func_start funcStart = + (uintptr_t)_Unwind_GetRegionStart(context); uintptr_t pcOffset = pc - funcStart; // Parse LSDA header. @@ -224,11 +273,14 @@ COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( const uint8_t *callSiteTableEnd = callSiteTableStart + callSiteTableLength; const uint8_t *p = callSiteTableStart; while (p < callSiteTableEnd) { - uintptr_t start = readEncodedPointer(&p, callSiteEncoding); - size_t length = readEncodedPointer(&p, callSiteEncoding); - size_t landingPad = readEncodedPointer(&p, callSiteEncoding); + uintptr_t __ptrauth_gcc_personality_start start = + readEncodedPointer(&p, callSiteEncoding); + size_t __ptrauth_gcc_personality_length length = + readEncodedPointer(&p, callSiteEncoding); + size_t __ptrauth_gcc_personality_lpoffset landingPadOffset = + readEncodedPointer(&p, callSiteEncoding); readULEB128(&p); // action value not used for C code - if (landingPad == 0) + if (landingPadOffset == 0) continue; // no landing pad for this entry if ((start <= pcOffset) && (pcOffset < (start + length))) { // Found landing pad for the PC. @@ -238,7 +290,24 @@ COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), (uintptr_t)exceptionObject); _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); - _Unwind_SetIP(context, (funcStart + landingPad)); + size_t __ptrauth_gcc_personality_lpad landingPad = + funcStart + landingPadOffset; +#if __has_feature(ptrauth_calls) + uintptr_t stackPointer = _Unwind_GetGR(context, -2); + const uintptr_t existingDiscriminator = ptrauth_blend_discriminator( + &landingPad, __ptrauth_gcc_personality_lpad_disc); + // newIP is authenticated as if it were qualified with a pseudo qualifier + // along the lines of: + // __ptrauth(ptrauth_key_return_address, <stackPointer>, 0) + // where the stack pointer is used in place of the strict storage + // address. + uintptr_t newIP = (uintptr_t)ptrauth_auth_and_resign( + *(void **)&landingPad, __ptrauth_gcc_personality_func_key, + existingDiscriminator, ptrauth_key_return_address, stackPointer); + _Unwind_SetIP(context, newIP); +#else + _Unwind_SetIP(context, landingPad); +#endif return _URC_INSTALL_CONTEXT; } } diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h index 273e611..7492466 100644 --- a/flang/include/flang/Lower/OpenMP/Clauses.h +++ b/flang/include/flang/Lower/OpenMP/Clauses.h @@ -214,6 +214,7 @@ using Depend = tomp::clause::DependT<TypeTy, IdTy, ExprTy>; using Destroy = tomp::clause::DestroyT<TypeTy, IdTy, ExprTy>; using Detach = tomp::clause::DetachT<TypeTy, IdTy, ExprTy>; using Device = tomp::clause::DeviceT<TypeTy, IdTy, ExprTy>; +using DeviceSafesync = tomp::clause::DeviceSafesyncT<TypeTy, IdTy, ExprTy>; using DeviceType = tomp::clause::DeviceTypeT<TypeTy, IdTy, ExprTy>; using DistSchedule = tomp::clause::DistScheduleT<TypeTy, IdTy, ExprTy>; using Doacross = tomp::clause::DoacrossT<TypeTy, IdTy, ExprTy>; diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 3501790..5677277 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -559,6 +559,7 @@ public: NODE(OmpDeviceClause, Modifier) NODE(parser, OmpDeviceModifier) NODE_ENUM(OmpDeviceModifier, Value) + NODE(parser, OmpDeviceSafesyncClause) NODE(parser, OmpDeviceTypeClause) NODE_ENUM(OmpDeviceTypeClause, DeviceTypeDescription) NODE(parser, OmpDirectiveName) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index bb3af9e..6dd4f249 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4418,6 +4418,14 @@ struct OmpDeviceClause { std::tuple<MODIFIERS(), ScalarIntExpr> t; }; +// Ref: [6.0:356-362] +// +// device-safesync-clause -> +// DEVICE_SAFESYNC [(scalar-logical-const-expr)] // since 6.0 +struct OmpDeviceSafesyncClause { + WRAPPER_CLASS_BOILERPLATE(OmpDeviceSafesyncClause, ScalarLogicalConstantExpr); +}; + // Ref: [5.0:180-185], [5.1:210-216], [5.2:275] // // device-type-clause -> diff --git a/flang/include/flang/Parser/tools.h b/flang/include/flang/Parser/tools.h index a90c856..d105f03 100644 --- a/flang/include/flang/Parser/tools.h +++ b/flang/include/flang/Parser/tools.h @@ -259,5 +259,7 @@ template <typename A> std::optional<CharBlock> GetLastSource(A &x) { // Checks whether the assignment statement has a single variable on the RHS. bool CheckForSingleVariableOnRHS(const AssignmentStmt &); +const Name *GetDesignatorNameIfDataRef(const Designator &); + } // namespace Fortran::parser #endif // FORTRAN_PARSER_TOOLS_H_ diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index b977fb8..8a7b986 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -739,12 +739,6 @@ const DerivedTypeSpec *GetDtvArgDerivedType(const Symbol &); void WarnOnDeferredLengthCharacterScalar(SemanticsContext &, const SomeExpr *, parser::CharBlock at, const char *what); -inline const parser::Name *getDesignatorNameIfDataRef( - const parser::Designator &designator) { - const auto *dataRef{std::get_if<parser::DataRef>(&designator.u)}; - return dataRef ? std::get_if<parser::Name>(&dataRef->u) : nullptr; -} - bool CouldBeDataPointerValuedFunction(const Symbol *); template <typename R, typename T> diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 0595ca0..3b711cc 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -58,6 +58,7 @@ #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Transforms/Passes.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Runtime/iostat-consts.h" #include "flang/Semantics/openmp-dsa.h" #include "flang/Semantics/runtime-type-info.h" @@ -3352,7 +3353,7 @@ private: &var.u)) { const Fortran::parser::Designator &designator = iDesignator->value(); if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef(designator)) { + Fortran::parser::GetDesignatorNameIfDataRef(designator)) { auto val = getSymbolAddress(*name->symbol); reduceOperands.push_back(val); } diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index cfb1891..b3e8b69 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -30,10 +30,12 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Semantics/expression.h" #include "flang/Semantics/scope.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/OpenACC/OpenACCUtils.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/MLIRContext.h" #include "mlir/Support/LLVM.h" @@ -296,7 +298,7 @@ getSymbolFromAccObject(const Fortran::parser::AccObject &accObject) { if (const auto *designator = std::get_if<Fortran::parser::Designator>(&accObject.u)) { if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef(*designator)) + Fortran::parser::GetDesignatorNameIfDataRef(*designator)) return *name->symbol; if (const auto *arrayElement = Fortran::parser::Unwrap<Fortran::parser::ArrayElement>( @@ -327,7 +329,8 @@ genAtomicCaptureStatement(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::acc::AtomicReadOp::create(firOpBuilder, loc, fromAddress, toAddress, - mlir::TypeAttr::get(elementType)); + mlir::TypeAttr::get(elementType), + /*ifCond=*/mlir::Value{}); } /// Used to generate atomic.write operation which is created in existing @@ -347,7 +350,8 @@ genAtomicWriteStatement(Fortran::lower::AbstractConverter &converter, rhsExpr = firOpBuilder.createConvert(loc, varType, rhsExpr); firOpBuilder.restoreInsertionPoint(insertionPoint); - mlir::acc::AtomicWriteOp::create(firOpBuilder, loc, lhsAddr, rhsExpr); + mlir::acc::AtomicWriteOp::create(firOpBuilder, loc, lhsAddr, rhsExpr, + /*ifCond=*/mlir::Value{}); } /// Used to generate atomic.update operation which is created in existing @@ -463,7 +467,8 @@ static inline void genAtomicUpdateStatement( mlir::Operation *atomicUpdateOp = nullptr; atomicUpdateOp = - mlir::acc::AtomicUpdateOp::create(firOpBuilder, currentLocation, lhsAddr); + mlir::acc::AtomicUpdateOp::create(firOpBuilder, currentLocation, lhsAddr, + /*ifCond=*/mlir::Value{}); llvm::SmallVector<mlir::Type> varTys = {varType}; llvm::SmallVector<mlir::Location> locs = {currentLocation}; @@ -588,7 +593,9 @@ void genAtomicCapture(Fortran::lower::AbstractConverter &converter, fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType(); mlir::Operation *atomicCaptureOp = nullptr; - atomicCaptureOp = mlir::acc::AtomicCaptureOp::create(firOpBuilder, loc); + atomicCaptureOp = + mlir::acc::AtomicCaptureOp::create(firOpBuilder, loc, + /*ifCond=*/mlir::Value{}); firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0))); mlir::Block &block = atomicCaptureOp->getRegion(0).back(); @@ -2712,12 +2719,19 @@ genACC(Fortran::lower::AbstractConverter &converter, const auto &loopDirective = std::get<Fortran::parser::AccLoopDirective>(beginLoopDirective.t); + mlir::Location currentLocation = + converter.genLocation(beginLoopDirective.source); bool needEarlyExitHandling = false; - if (eval.lowerAsUnstructured()) + if (eval.lowerAsUnstructured()) { needEarlyExitHandling = hasEarlyReturn(eval); + // If the loop is lowered in an unstructured fashion, lowering generates + // explicit control flow that duplicates the looping semantics of the + // loops. + if (!needEarlyExitHandling) + TODO(currentLocation, + "loop with early exit inside OpenACC loop construct"); + } - mlir::Location currentLocation = - converter.genLocation(beginLoopDirective.source); Fortran::lower::StatementContext stmtCtx; assert(loopDirective.v == llvm::acc::ACCD_loop && @@ -2900,7 +2914,7 @@ static Op createComputeOp( if (const auto *designator = std::get_if<Fortran::parser::Designator>(&accObject.u)) { if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef( + Fortran::parser::GetDesignatorNameIfDataRef( *designator)) { auto cond = converter.getSymbolAddress(*name->symbol); selfCond = builder.createConvert(clauseLocation, @@ -3516,6 +3530,10 @@ genACC(Fortran::lower::AbstractConverter &converter, converter.genLocation(beginCombinedDirective.source); Fortran::lower::StatementContext stmtCtx; + if (eval.lowerAsUnstructured()) + TODO(currentLocation, + "loop with early exit inside OpenACC combined construct"); + if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) { createComputeOp<mlir::acc::KernelsOp>( converter, currentLocation, eval, semanticsContext, stmtCtx, @@ -4261,8 +4279,7 @@ static void genGlobalCtors(Fortran::lower::AbstractConverter &converter, Fortran::common::visitors{ [&](const Fortran::parser::Designator &designator) { if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef( - designator)) { + Fortran::parser::GetDesignatorNameIfDataRef(designator)) { genCtors(operandLocation, *name->symbol); } }, diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index ba34212..2a4ebf1 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -740,6 +740,18 @@ Device make(const parser::OmpClause::Device &inp, /*DeviceDescription=*/makeExpr(t1, semaCtx)}}; } +DeviceSafesync make(const parser::OmpClause::DeviceSafesync &inp, + semantics::SemanticsContext &semaCtx) { + // inp.v -> std::optional<parser::OmpDeviceSafesyncClause> + auto &&maybeRequired = maybeApply( + [&](const parser::OmpDeviceSafesyncClause &c) { + return makeExpr(c.v, semaCtx); + }, + inp.v); + + return DeviceSafesync{/*Required=*/std::move(maybeRequired)}; +} + DeviceType make(const parser::OmpClause::DeviceType &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpDeviceTypeClause diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index f86ee01..9495ea6 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -33,6 +33,7 @@ #include "flang/Parser/characters.h" #include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Semantics/openmp-directive-sets.h" #include "flang/Semantics/openmp-utils.h" #include "flang/Semantics/tools.h" @@ -3884,7 +3885,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, assert(object && "Expecting object as argument"); auto *designator = semantics::omp::GetDesignatorFromObj(*object); assert(designator && "Expecting desginator in argument"); - auto *name = semantics::getDesignatorNameIfDataRef(*designator); + auto *name = parser::GetDesignatorNameIfDataRef(*designator); assert(name && "Expecting dataref in designator"); critName = *name; } diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index d677e14..56fcac3 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1159,6 +1159,9 @@ TYPE_PARSER( // construct<OmpDestroyClause>(Parser<OmpObject>{}))))) || "DEVICE" >> construct<OmpClause>(construct<OmpClause::Device>( parenthesized(Parser<OmpDeviceClause>{}))) || + "DEVICE_SAFESYNC" >> + construct<OmpClause>(construct<OmpClause::DeviceSafesync>( + maybe(parenthesized(scalarLogicalConstantExpr)))) || "DEVICE_TYPE" >> construct<OmpClause>(construct<OmpClause::DeviceType>( parenthesized(Parser<OmpDeviceTypeClause>{}))) || "DIST_SCHEDULE" >> diff --git a/flang/lib/Parser/tools.cpp b/flang/lib/Parser/tools.cpp index 264ca52..ed6d194 100644 --- a/flang/lib/Parser/tools.cpp +++ b/flang/lib/Parser/tools.cpp @@ -179,4 +179,9 @@ bool CheckForSingleVariableOnRHS(const AssignmentStmt &assignmentStmt) { return Unwrap<Designator>(std::get<Expr>(assignmentStmt.t)) != nullptr; } +const Name *GetDesignatorNameIfDataRef(const Designator &designator) { + const auto *dataRef{std::get_if<DataRef>(&designator.u)}; + return dataRef ? std::get_if<Name>(&dataRef->u) : nullptr; +} + } // namespace Fortran::parser diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp index 3cd6d6b..5e87b83 100644 --- a/flang/lib/Semantics/check-acc-structure.cpp +++ b/flang/lib/Semantics/check-acc-structure.cpp @@ -10,6 +10,7 @@ #include "flang/Common/enum-set.h" #include "flang/Evaluate/tools.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/tools.h" #include "flang/Semantics/type.h" @@ -709,7 +710,8 @@ void AccStructureChecker::CheckMultipleOccurrenceInDeclare( common::visit( common::visitors{ [&](const parser::Designator &designator) { - if (const auto *name = getDesignatorNameIfDataRef(designator)) { + if (const auto *name = + parser::GetDesignatorNameIfDataRef(designator)) { if (declareSymbols.contains(&name->symbol->GetUltimate())) { if (declareSymbols[&name->symbol->GetUltimate()] == clause) { context_.Warn(common::UsageWarning::OpenAccUsage, @@ -982,7 +984,8 @@ void AccStructureChecker::Enter(const parser::AccClause::Reduction &reduction) { common::visit( common::visitors{ [&](const parser::Designator &designator) { - if (const auto *name = getDesignatorNameIfDataRef(designator)) { + if (const auto *name = + parser::GetDesignatorNameIfDataRef(designator)) { if (name->symbol) { if (const auto *type{name->symbol->GetType()}) { if (type->IsNumeric(TypeCategory::Integer) && diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index c9d0495..aaaa2d6 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -127,24 +127,23 @@ using namespace Fortran::semantics::omp; void OmpStructureChecker::HasInvalidDistributeNesting( const parser::OpenMPLoopConstruct &x) { - bool violation{false}; const parser::OmpDirectiveName &beginName{x.BeginDir().DirName()}; if (llvm::omp::topDistributeSet.test(beginName.v)) { // `distribute` region has to be nested - if (!CurrentDirectiveIsNested()) { - violation = true; - } else { + if (CurrentDirectiveIsNested()) { // `distribute` region has to be strictly nested inside `teams` if (!llvm::omp::bottomTeamsSet.test(GetContextParent().directive)) { - violation = true; + context_.Say(beginName.source, + "`DISTRIBUTE` region has to be strictly nested inside `TEAMS` " + "region."_err_en_US); } + } else { + // If not lexically nested (orphaned), issue a warning. + context_.Say(beginName.source, + "`DISTRIBUTE` must be dynamically enclosed in a `TEAMS` " + "region."_warn_en_US); } } - if (violation) { - context_.Say(beginName.source, - "`DISTRIBUTE` region has to be strictly nested inside `TEAMS` " - "region."_err_en_US); - } } void OmpStructureChecker::HasInvalidLoopBinding( const parser::OpenMPLoopConstruct &x) { @@ -486,8 +485,8 @@ void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &x) { common::visit( common::visitors{ [&](const parser::Designator &designator) { - if (const auto *name{semantics::getDesignatorNameIfDataRef( - designator)}) { + if (const auto *name{ + parser::GetDesignatorNameIfDataRef(designator)}) { checkReductionSymbolInScan(name); } }, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index ea6fe43..e2f8796 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1535,6 +1535,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPRequiresConstruct &x) { [&](auto &&s) { using TypeS = llvm::remove_cvref_t<decltype(s)>; if constexpr ( // + std::is_same_v<TypeS, parser::OmpClause::DeviceSafesync> || std::is_same_v<TypeS, parser::OmpClause::DynamicAllocators> || std::is_same_v<TypeS, parser::OmpClause::ReverseOffload> || std::is_same_v<TypeS, parser::OmpClause::SelfMaps> || @@ -2616,7 +2617,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPCriticalConstruct &x) { auto getNameFromArg{[](const parser::OmpArgument &arg) { if (auto *object{parser::Unwrap<parser::OmpObject>(arg.u)}) { if (auto *designator{omp::GetDesignatorFromObj(*object)}) { - return getDesignatorNameIfDataRef(*designator); + return parser::GetDesignatorNameIfDataRef(*designator); } } return static_cast<const parser::Name *>(nullptr); @@ -5194,6 +5195,10 @@ void OmpStructureChecker::Enter( CheckAllowedRequiresClause(llvm::omp::Clause::OMPC_atomic_default_mem_order); } +void OmpStructureChecker::Enter(const parser::OmpClause::DeviceSafesync &x) { + CheckAllowedRequiresClause(llvm::omp::Clause::OMPC_device_safesync); +} + void OmpStructureChecker::Enter(const parser::OmpClause::DynamicAllocators &x) { CheckAllowedRequiresClause(llvm::omp::Clause::OMPC_dynamic_allocators); } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 7067ed3..3bb586c 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -586,6 +586,7 @@ public: [&](auto &&s) { using TypeS = llvm::remove_cvref_t<decltype(s)>; if constexpr ( // + std::is_same_v<TypeS, OmpClause::DeviceSafesync> || std::is_same_v<TypeS, OmpClause::DynamicAllocators> || std::is_same_v<TypeS, OmpClause::ReverseOffload> || std::is_same_v<TypeS, OmpClause::SelfMaps> || @@ -892,7 +893,7 @@ public: common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { if (name->symbol) { name->symbol->set( ompFlag.value_or(Symbol::Flag::OmpMapStorage)); @@ -1758,7 +1759,7 @@ void AccAttributeVisitor::ResolveAccObject( common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { if (auto *symbol{ResolveAcc(*name, accFlag, currScope())}) { AddToContextObjectWithDSA(*symbol, accFlag); if (dataSharingAttributeFlags.test(accFlag)) { @@ -3064,7 +3065,7 @@ void OmpAttributeVisitor::ResolveOmpDesignator( unsigned version{context_.langOptions().OpenMPVersion}; llvm::omp::Directive directive{GetContext().directive}; - const auto *name{semantics::getDesignatorNameIfDataRef(designator)}; + const auto *name{parser::GetDesignatorNameIfDataRef(designator)}; if (!name) { // Array sections to be changed to substrings as needed if (AnalyzeExpr(context_, designator)) { diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 699de41..0af1c94 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1494,7 +1494,7 @@ bool AccVisitor::Pre(const parser::AccClause::UseDevice &x) { common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { Symbol *prev{currScope().FindSymbol(name->source)}; if (prev != name->symbol) { name->symbol = prev; @@ -1648,7 +1648,7 @@ public: common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { specPartState_.declareTargetNames.insert(name->source); } }, @@ -2016,7 +2016,7 @@ void OmpVisitor::ResolveCriticalName(const parser::OmpArgument &arg) { if (auto *object{parser::Unwrap<parser::OmpObject>(arg.u)}) { if (auto *desg{omp::GetDesignatorFromObj(*object)}) { - if (auto *name{getDesignatorNameIfDataRef(*desg)}) { + if (auto *name{parser::GetDesignatorNameIfDataRef(*desg)}) { if (auto *symbol{FindInScope(globalScope, *name)}) { if (!symbol->test(Symbol::Flag::OmpCriticalLock)) { SayWithDecl(*name, *symbol, diff --git a/flang/test/Lower/OpenACC/acc-unstructured.f90 b/flang/test/Lower/OpenACC/acc-unstructured.f90 index c57af98..c42c7dd 100644 --- a/flang/test/Lower/OpenACC/acc-unstructured.f90 +++ b/flang/test/Lower/OpenACC/acc-unstructured.f90 @@ -1,4 +1,5 @@ ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s +! XFAIL: * subroutine test_unstructured1(a, b, c) integer :: i, j, k diff --git a/flang/test/Parser/OpenMP/requires.f90 b/flang/test/Parser/OpenMP/requires.f90 index 8169403..ab4f437 100644 --- a/flang/test/Parser/OpenMP/requires.f90 +++ b/flang/test/Parser/OpenMP/requires.f90 @@ -1,5 +1,5 @@ -!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s -!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=50 %s | FileCheck --check-prefix="PARSE-TREE" %s +!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s +!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s !$omp requires atomic_default_mem_order(seq_cst) @@ -44,4 +44,13 @@ !PARSE-TREE: | | | bool = 'false' !PARSE-TREE: | Flags = None +!$omp requires device_safesync + +!UNPARSE: !$OMP REQUIRES DEVICE_SAFESYNC + +!PARSE-TREE: OpenMPDeclarativeConstruct -> OpenMPRequiresConstruct -> OmpDirectiveSpecification +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = requires +!PARSE-TREE: | OmpClauseList -> OmpClause -> DeviceSafesync +!PARSE-TREE: | Flags = None + end diff --git a/flang/test/Semantics/OpenMP/combined-constructs.f90 b/flang/test/Semantics/OpenMP/combined-constructs.f90 index 49da562..3e9c654 100644 --- a/flang/test/Semantics/OpenMP/combined-constructs.f90 +++ b/flang/test/Semantics/OpenMP/combined-constructs.f90 @@ -7,7 +7,7 @@ program main real(8) :: a(256), b(256) N = 256 - !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. + !WARNING: `DISTRIBUTE` must be dynamically enclosed in a `TEAMS` region. !$omp distribute simd do i = 1, N a(i) = 3.14d0 diff --git a/flang/test/Semantics/OpenMP/do05.f90 b/flang/test/Semantics/OpenMP/do05.f90 index 24844f9..d832052 100644 --- a/flang/test/Semantics/OpenMP/do05.f90 +++ b/flang/test/Semantics/OpenMP/do05.f90 @@ -49,7 +49,7 @@ program omp_do end do !$omp end parallel do simd - !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. + !WARNING: `DISTRIBUTE` must be dynamically enclosed in a `TEAMS` region. !$omp distribute parallel do do i=1,10 if( i == 3 ) then @@ -64,7 +64,7 @@ program omp_do end do !$omp end distribute parallel do - !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. + !WARNING: `DISTRIBUTE` must be dynamically enclosed in a `TEAMS` region. !$omp distribute parallel do simd do i=1,10 if( i == 3 ) then diff --git a/flang/test/Semantics/OpenMP/linear-iter.f90 b/flang/test/Semantics/OpenMP/linear-iter.f90 index 1f40228..5c9f2d5 100644 --- a/flang/test/Semantics/OpenMP/linear-iter.f90 +++ b/flang/test/Semantics/OpenMP/linear-iter.f90 @@ -53,7 +53,7 @@ SUBROUTINE LINEAR_BAD(N) !$omp end teams !$omp end target - !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. + !WARNING: `DISTRIBUTE` must be dynamically enclosed in a `TEAMS` region. !ERROR: Variable 'j' not allowed in LINEAR clause, only loop iterator can be specified in LINEAR clause of a construct combined with DISTRIBUTE !$omp distribute simd linear(i,j) do i = 1, N @@ -63,7 +63,7 @@ SUBROUTINE LINEAR_BAD(N) enddo !$omp end distribute simd - !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. + !WARNING: `DISTRIBUTE` must be dynamically enclosed in a `TEAMS` region. !ERROR: Variable 'j' not allowed in LINEAR clause, only loop iterator can be specified in LINEAR clause of a construct combined with DISTRIBUTE !$omp distribute simd linear(i,j) collapse(1) do i = 1, N @@ -73,7 +73,7 @@ SUBROUTINE LINEAR_BAD(N) enddo !$omp end distribute simd - !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. + !WARNING: `DISTRIBUTE` must be dynamically enclosed in a `TEAMS` region. !$omp distribute simd linear(i,j) collapse(2) do i = 1, N do j = 1, N diff --git a/flang/test/Semantics/OpenMP/nested-distribute.f90 b/flang/test/Semantics/OpenMP/nested-distribute.f90 index cb4aea3..4134ced 100644 --- a/flang/test/Semantics/OpenMP/nested-distribute.f90 +++ b/flang/test/Semantics/OpenMP/nested-distribute.f90 @@ -1,6 +1,15 @@ ! RUN: %python %S/../test_errors.py %s %flang -fopenmp ! Check OpenMP clause validity for the following directives: ! 2.10 Device constructs + +subroutine f + integer :: i + !WARNING: `DISTRIBUTE` must be dynamically enclosed in a `TEAMS` region. + !$omp distribute + do i = 1, 100 + print *, "hello" + end do +end subroutine program main real(8) :: arrayA(256), arrayB(256) @@ -108,4 +117,8 @@ program main end do !$omp end distribute !$omp end task + + !$omp teams + call foo + !$omp end teams end program main diff --git a/libcxxabi/include/__cxxabi_config.h b/libcxxabi/include/__cxxabi_config.h index 759445d..f5101db 100644 --- a/libcxxabi/include/__cxxabi_config.h +++ b/libcxxabi/include/__cxxabi_config.h @@ -103,6 +103,47 @@ #define _LIBCXXABI_DTOR_FUNC #endif +#if __has_include(<ptrauth.h>) +# include <ptrauth.h> +#endif + +#if __has_feature(ptrauth_calls) + +// ptrauth_string_discriminator("__cxa_exception::actionRecord") == 0xFC91 +# define __ptrauth_cxxabi_action_record __ptrauth(ptrauth_key_process_dependent_data, 1, 0xFC91) + +// ptrauth_string_discriminator("__cxa_exception::languageSpecificData") == 0xE8EE +# define __ptrauth_cxxabi_lsd __ptrauth(ptrauth_key_process_dependent_data, 1, 0xE8EE) + +// ptrauth_string_discriminator("__cxa_exception::catchTemp") == 0xFA58 +# define __ptrauth_cxxabi_catch_temp_disc 0xFA58 +# define __ptrauth_cxxabi_catch_temp_key ptrauth_key_process_dependent_data +# define __ptrauth_cxxabi_catch_temp __ptrauth(__ptrauth_cxxabi_catch_temp_key, 1, __ptrauth_cxxabi_catch_temp_disc) + +// ptrauth_string_discriminator("__cxa_exception::adjustedPtr") == 0x99E4 +# define __ptrauth_cxxabi_adjusted_ptr __ptrauth(ptrauth_key_process_dependent_data, 1, 0x99E4) + +// ptrauth_string_discriminator("__cxa_exception::unexpectedHandler") == 0x99A9 +# define __ptrauth_cxxabi_unexpected_handler __ptrauth(ptrauth_key_function_pointer, 1, 0x99A9) + +// ptrauth_string_discriminator("__cxa_exception::terminateHandler") == 0x0886) +# define __ptrauth_cxxabi_terminate_handler __ptrauth(ptrauth_key_function_pointer, 1, 0x886) + +// ptrauth_string_discriminator("__cxa_exception::exceptionDestructor") == 0xC088 +# define __ptrauth_cxxabi_exception_destructor __ptrauth(ptrauth_key_function_pointer, 1, 0xC088) + +#else + +# define __ptrauth_cxxabi_action_record +# define __ptrauth_cxxabi_lsd +# define __ptrauth_cxxabi_catch_temp +# define __ptrauth_cxxabi_adjusted_ptr +# define __ptrauth_cxxabi_unexpected_handler +# define __ptrauth_cxxabi_terminate_handler +# define __ptrauth_cxxabi_exception_destructor + +#endif + #if __cplusplus < 201103L # define _LIBCXXABI_NOEXCEPT throw() #else diff --git a/libcxxabi/src/cxa_exception.cpp b/libcxxabi/src/cxa_exception.cpp index 92901a8..73ba21c 100644 --- a/libcxxabi/src/cxa_exception.cpp +++ b/libcxxabi/src/cxa_exception.cpp @@ -192,7 +192,9 @@ void *__cxa_allocate_exception(size_t thrown_size) throw() { std::terminate(); __cxa_exception *exception_header = static_cast<__cxa_exception *>((void *)(raw_buffer + header_offset)); - ::memset(exception_header, 0, actual_size); + // We warn on memset to a non-trivially castable type. We might want to + // change that diagnostic to not fire on a trivially obvious zero fill. + ::memset(static_cast<void*>(exception_header), 0, actual_size); return thrown_object_from_cxa_exception(exception_header); } diff --git a/libcxxabi/src/cxa_exception.h b/libcxxabi/src/cxa_exception.h index aba08f2..fa4c4dc 100644 --- a/libcxxabi/src/cxa_exception.h +++ b/libcxxabi/src/cxa_exception.h @@ -47,10 +47,10 @@ struct _LIBCXXABI_HIDDEN __cxa_exception { // In Wasm, a destructor returns its argument void *(_LIBCXXABI_DTOR_FUNC *exceptionDestructor)(void *); #else - void (_LIBCXXABI_DTOR_FUNC *exceptionDestructor)(void *); + void (_LIBCXXABI_DTOR_FUNC *__ptrauth_cxxabi_exception_destructor exceptionDestructor)(void *); #endif - std::unexpected_handler unexpectedHandler; - std::terminate_handler terminateHandler; + std::unexpected_handler __ptrauth_cxxabi_unexpected_handler unexpectedHandler; + std::terminate_handler __ptrauth_cxxabi_terminate_handler terminateHandler; __cxa_exception *nextException; @@ -61,10 +61,10 @@ struct _LIBCXXABI_HIDDEN __cxa_exception { int propagationCount; #else int handlerSwitchValue; - const unsigned char *actionRecord; - const unsigned char *languageSpecificData; - void *catchTemp; - void *adjustedPtr; + const unsigned char *__ptrauth_cxxabi_action_record actionRecord; + const unsigned char *__ptrauth_cxxabi_lsd languageSpecificData; + void *__ptrauth_cxxabi_catch_temp catchTemp; + void *__ptrauth_cxxabi_adjusted_ptr adjustedPtr; #endif #if !defined(__LP64__) && !defined(_WIN64) && !defined(_LIBCXXABI_ARM_EHABI) @@ -79,6 +79,8 @@ struct _LIBCXXABI_HIDDEN __cxa_exception { // http://sourcery.mentor.com/archives/cxx-abi-dev/msg01924.html // The layout of this structure MUST match the layout of __cxa_exception, with // primaryException instead of referenceCount. +// The pointer authentication schemas specified here must also match those of +// the corresponding members in __cxa_exception. struct _LIBCXXABI_HIDDEN __cxa_dependent_exception { #if defined(__LP64__) || defined(_WIN64) || defined(_LIBCXXABI_ARM_EHABI) void* reserve; // padding. @@ -86,9 +88,9 @@ struct _LIBCXXABI_HIDDEN __cxa_dependent_exception { #endif std::type_info *exceptionType; - void (_LIBCXXABI_DTOR_FUNC *exceptionDestructor)(void *); - std::unexpected_handler unexpectedHandler; - std::terminate_handler terminateHandler; + void (_LIBCXXABI_DTOR_FUNC *__ptrauth_cxxabi_exception_destructor exceptionDestructor)(void *); + std::unexpected_handler __ptrauth_cxxabi_unexpected_handler unexpectedHandler; + std::terminate_handler __ptrauth_cxxabi_terminate_handler terminateHandler; __cxa_exception *nextException; @@ -99,10 +101,10 @@ struct _LIBCXXABI_HIDDEN __cxa_dependent_exception { int propagationCount; #else int handlerSwitchValue; - const unsigned char *actionRecord; - const unsigned char *languageSpecificData; - void * catchTemp; - void *adjustedPtr; + const unsigned char *__ptrauth_cxxabi_action_record actionRecord; + const unsigned char *__ptrauth_cxxabi_lsd languageSpecificData; + void *__ptrauth_cxxabi_catch_temp catchTemp; + void *__ptrauth_cxxabi_adjusted_ptr adjustedPtr; #endif #if !defined(__LP64__) && !defined(_WIN64) && !defined(_LIBCXXABI_ARM_EHABI) diff --git a/libcxxabi/src/cxa_personality.cpp b/libcxxabi/src/cxa_personality.cpp index 5f6e75c..b7eb0f2 100644 --- a/libcxxabi/src/cxa_personality.cpp +++ b/libcxxabi/src/cxa_personality.cpp @@ -20,7 +20,47 @@ #include "cxa_exception.h" #include "cxa_handlers.h" #include "private_typeinfo.h" -#include "unwind.h" + +#if __has_feature(ptrauth_calls) + +// CXXABI depends on defintions in libunwind as pointer auth couples the +// definitions +# include "libunwind.h" + +// The actual value of the discriminators listed below is not important. +// The derivation of the constants is only being included for the purpose +// of maintaining a record of how they were originally produced. + +// ptrauth_string_discriminator("scan_results::languageSpecificData") == 0xE50D) +# define __ptrauth_scan_results_lsd __ptrauth(ptrauth_key_process_dependent_code, 1, 0xE50D) + +// ptrauth_string_discriminator("scan_results::actionRecord") == 0x9823 +# define __ptrauth_scan_results_action_record __ptrauth(ptrauth_key_process_dependent_code, 1, 0x9823) + +// scan result is broken up as we have a manual re-sign that requires each component +# define __ptrauth_scan_results_landingpad_key ptrauth_key_process_dependent_code +// ptrauth_string_discriminator("scan_results::landingPad") == 0xD27C +# define __ptrauth_scan_results_landingpad_disc 0xD27C +# define __ptrauth_scan_results_landingpad \ + __ptrauth(__ptrauth_scan_results_landingpad_key, 1, __ptrauth_scan_results_landingpad_disc) + +// `__ptrauth_restricted_intptr` is a feature of apple clang that predates +// support for direct application of `__ptrauth` to integer types. This +// guard is necessary to support compilation with those compiler. +# if __has_extension(ptrauth_restricted_intptr_qualifier) +# define __ptrauth_scan_results_landingpad_intptr \ + __ptrauth_restricted_intptr(__ptrauth_scan_results_landingpad_key, 1, __ptrauth_scan_results_landingpad_disc) +# else +# define __ptrauth_scan_results_landingpad_intptr \ + __ptrauth(__ptrauth_scan_results_landingpad_key, 1, __ptrauth_scan_results_landingpad_disc) +# endif + +#else +# define __ptrauth_scan_results_lsd +# define __ptrauth_scan_results_action_record +# define __ptrauth_scan_results_landingpad +# define __ptrauth_scan_results_landingpad_intptr +#endif // TODO: This is a temporary workaround for libc++abi to recognize that it's being // built against LLVM's libunwind. LLVM's libunwind started reporting _LIBUNWIND_VERSION @@ -527,12 +567,17 @@ get_thrown_object_ptr(_Unwind_Exception* unwind_exception) namespace { +typedef const uint8_t *__ptrauth_scan_results_lsd lsd_ptr_t; +typedef const uint8_t *__ptrauth_scan_results_action_record action_ptr_t; +typedef uintptr_t __ptrauth_scan_results_landingpad_intptr landing_pad_t; +typedef void *__ptrauth_scan_results_landingpad landing_pad_ptr_t; + struct scan_results { int64_t ttypeIndex; // > 0 catch handler, < 0 exception spec handler, == 0 a cleanup - const uint8_t* actionRecord; // Currently unused. Retained to ease future maintenance. - const uint8_t* languageSpecificData; // Needed only for __cxa_call_unexpected - uintptr_t landingPad; // null -> nothing found, else something found + action_ptr_t actionRecord; // Currently unused. Retained to ease future maintenance. + lsd_ptr_t languageSpecificData; // Needed only for __cxa_call_unexpected + landing_pad_t landingPad; // null -> nothing found, else something found void* adjustedPtr; // Used in cxa_exception.cpp _Unwind_Reason_Code reason; // One of _URC_FATAL_PHASE1_ERROR, // _URC_FATAL_PHASE2_ERROR, @@ -557,7 +602,23 @@ set_registers(_Unwind_Exception* unwind_exception, _Unwind_Context* context, reinterpret_cast<uintptr_t>(unwind_exception)); _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), static_cast<uintptr_t>(results.ttypeIndex)); +#if __has_feature(ptrauth_calls) + auto stackPointer = _Unwind_GetGR(context, UNW_REG_SP); + // We manually re-sign the IP as the __ptrauth qualifiers cannot + // express the required relationship with the destination address + const auto existingDiscriminator = + ptrauth_blend_discriminator(&results.landingPad, + __ptrauth_scan_results_landingpad_disc); + unw_word_t newIP /* opaque __ptrauth(ptrauth_key_return_address, stackPointer, 0) */ = + (unw_word_t)ptrauth_auth_and_resign(*(void* const*)&results.landingPad, + __ptrauth_scan_results_landingpad_key, + existingDiscriminator, + ptrauth_key_return_address, + stackPointer); + _Unwind_SetIP(context, newIP); +#else _Unwind_SetIP(context, results.landingPad); +#endif } /* @@ -691,12 +752,12 @@ static void scan_eh_tab(scan_results &results, _Unwind_Action actions, // The call sites are ordered in increasing value of start uintptr_t start = readEncodedPointer(&callSitePtr, callSiteEncoding); uintptr_t length = readEncodedPointer(&callSitePtr, callSiteEncoding); - uintptr_t landingPad = readEncodedPointer(&callSitePtr, callSiteEncoding); + landing_pad_t landingPad = readEncodedPointer(&callSitePtr, callSiteEncoding); uintptr_t actionEntry = readULEB128(&callSitePtr); if ((start <= ipOffset) && (ipOffset < (start + length))) #else // __USING_SJLJ_EXCEPTIONS__ || __WASM_EXCEPTIONS__ // ip is 1-based index into this table - uintptr_t landingPad = readULEB128(&callSitePtr); + landing_pad_t landingPad = readULEB128(&callSitePtr); uintptr_t actionEntry = readULEB128(&callSitePtr); if (--ip == 0) #endif // __USING_SJLJ_EXCEPTIONS__ || __WASM_EXCEPTIONS__ @@ -903,6 +964,57 @@ _UA_CLEANUP_PHASE */ #if !defined(_LIBCXXABI_ARM_EHABI) + +// We use these helper functions to work around the behavior of casting between +// integers (even those that are authenticated) and authenticated pointers. +// Because the schemas being used are address discriminated we cannot use a +// trivial value union to coerce the types so instead we perform the re-signing +// manually. +using __cxa_catch_temp_type = decltype(__cxa_exception::catchTemp); +static inline void set_landing_pad(scan_results& results, + const __cxa_catch_temp_type& source) { +#if __has_feature(ptrauth_calls) + const uintptr_t sourceDiscriminator = + ptrauth_blend_discriminator(&source, __ptrauth_cxxabi_catch_temp_disc); + const uintptr_t targetDiscriminator = + ptrauth_blend_discriminator(&results.landingPad, + __ptrauth_scan_results_landingpad_disc); + uintptr_t reauthenticatedLandingPad = + (uintptr_t)ptrauth_auth_and_resign(*reinterpret_cast<void* const*>(&source), + __ptrauth_cxxabi_catch_temp_key, + sourceDiscriminator, + __ptrauth_scan_results_landingpad_key, + targetDiscriminator); + memmove(reinterpret_cast<void *>(&results.landingPad), + reinterpret_cast<void *>(&reauthenticatedLandingPad), + sizeof(reauthenticatedLandingPad)); +#else + results.landingPad = reinterpret_cast<landing_pad_t>(source); +#endif +} + +static inline void get_landing_pad(__cxa_catch_temp_type &dest, + const scan_results &results) { +#if __has_feature(ptrauth_calls) + const uintptr_t sourceDiscriminator = + ptrauth_blend_discriminator(&results.landingPad, + __ptrauth_scan_results_landingpad_disc); + const uintptr_t targetDiscriminator = + ptrauth_blend_discriminator(&dest, __ptrauth_cxxabi_catch_temp_disc); + uintptr_t reauthenticatedPointer = + (uintptr_t)ptrauth_auth_and_resign(*reinterpret_cast<void* const*>(&results.landingPad), + __ptrauth_scan_results_landingpad_key, + sourceDiscriminator, + __ptrauth_cxxabi_catch_temp_key, + targetDiscriminator); + memmove(reinterpret_cast<void *>(&dest), + reinterpret_cast<void *>(&reauthenticatedPointer), + sizeof(reauthenticatedPointer)); +#else + dest = reinterpret_cast<__cxa_catch_temp_type>(results.landingPad); +#endif +} + #ifdef __WASM_EXCEPTIONS__ _Unwind_Reason_Code __gxx_personality_wasm0 #elif defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) @@ -935,8 +1047,7 @@ __gxx_personality_v0 results.ttypeIndex = exception_header->handlerSwitchValue; results.actionRecord = exception_header->actionRecord; results.languageSpecificData = exception_header->languageSpecificData; - results.landingPad = - reinterpret_cast<uintptr_t>(exception_header->catchTemp); + set_landing_pad(results, exception_header->catchTemp); results.adjustedPtr = exception_header->adjustedPtr; // Jump to the handler. @@ -970,7 +1081,7 @@ __gxx_personality_v0 exc->handlerSwitchValue = static_cast<int>(results.ttypeIndex); exc->actionRecord = results.actionRecord; exc->languageSpecificData = results.languageSpecificData; - exc->catchTemp = reinterpret_cast<void*>(results.landingPad); + get_landing_pad(exc->catchTemp, results); exc->adjustedPtr = results.adjustedPtr; #ifdef __WASM_EXCEPTIONS__ // Wasm only uses a single phase (_UA_SEARCH_PHASE), so save the diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h index 544b3ec..343934e 100644 --- a/libunwind/include/__libunwind_config.h +++ b/libunwind/include/__libunwind_config.h @@ -212,4 +212,11 @@ # define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287 #endif // _LIBUNWIND_IS_NATIVE_ONLY +#if __has_feature(ptrauth_calls) && __has_feature(ptrauth_returns) +# define _LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING 1 +#elif __has_feature(ptrauth_calls) != __has_feature(ptrauth_returns) +# error "Either both or none of ptrauth_calls and ptrauth_returns "\ + "is allowed to be enabled" +#endif + #endif // ____LIBUNWIND_CONFIG_H__ diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h index 94928f4..18684ce 100644 --- a/libunwind/include/libunwind.h +++ b/libunwind/include/libunwind.h @@ -43,6 +43,109 @@ #define LIBUNWIND_AVAIL #endif +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + + #include <ptrauth.h> + + // `__ptrauth_restricted_intptr` is a feature of apple clang that predates + // support for direct application of `__ptrauth` to integer types. This + // guard is necessary to support compilation with those compiler. + #if __has_extension(ptrauth_restricted_intptr_qualifier) + #define __unwind_ptrauth_restricted_intptr(...) \ + __ptrauth_restricted_intptr(__VA_ARGS__) + #else + #define __unwind_ptrauth_restricted_intptr(...) \ + __ptrauth(__VA_ARGS__) + #endif + + // ptrauth_string_discriminator("unw_proc_info_t::handler") == 0x7405 + #define __ptrauth_unwind_upi_handler_disc 0x7405 + + #define __ptrauth_unwind_upi_handler \ + __ptrauth(ptrauth_key_function_pointer, 1, __ptrauth_unwind_upi_handler_disc) + + #define __ptrauth_unwind_upi_handler_intptr \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_function_pointer, 1,\ + __ptrauth_unwind_upi_handler_disc) + + // ptrauth_string_discriminator("unw_proc_info_t::start_ip") == 0xCA2C + #define __ptrauth_unwind_upi_startip \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_independent_code, 1, 0xCA2C) + + // ptrauth_string_discriminator("unw_proc_info_t::end_ip") == 0xE183 + #define __ptrauth_unwind_upi_endip \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_independent_code, 1, 0xE183) + + // ptrauth_string_discriminator("unw_proc_info_t::lsda") == 0x83DE + #define __ptrauth_unwind_upi_lsda \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x83DE) + + // ptrauth_string_discriminator("unw_proc_info_t::flags") == 0x79A1 + #define __ptrauth_unwind_upi_flags \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x79A1) + + // ptrauth_string_discriminator("unw_proc_info_t::unwind_info") == 0xC20C + #define __ptrauth_unwind_upi_info \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0xC20C) + + // ptrauth_string_discriminator("unw_proc_info_t::extra") == 0x03DF + #define __ptrauth_unwind_upi_extra \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x03DF) + + // ptrauth_string_discriminator("Registers_arm64::link_reg_t") == 0x8301 + #define __ptrauth_unwind_registers_arm64_link_reg \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_code, 1, 0x8301) + + // ptrauth_string_discriminator("UnwindInfoSections::dso_base") == 0x4FF5 + #define __ptrauth_unwind_uis_dso_base \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x4FF5) + + // ptrauth_string_discriminator("UnwindInfoSections::dwarf_section") == 0x4974 + #define __ptrauth_unwind_uis_dwarf_section \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x4974) + + // ptrauth_string_discriminator("UnwindInfoSections::dwarf_section_length") == 0x2A9A + #define __ptrauth_unwind_uis_dwarf_section_length \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x2A9A) + + // ptrauth_string_discriminator("UnwindInfoSections::compact_unwind_section") == 0xA27B + #define __ptrauth_unwind_uis_compact_unwind_section \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0xA27B) + + // ptrauth_string_discriminator("UnwindInfoSections::compact_unwind_section_length") == 0x5D0A + #define __ptrauth_unwind_uis_compact_unwind_section_length \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x5D0A) + + // ptrauth_string_discriminator("CIE_Info::personality") == 0x6A40 + #define __ptrauth_unwind_cie_info_personality_disc 0x6A40 + #define __ptrauth_unwind_cie_info_personality \ + __unwind_ptrauth_restricted_intptr(ptrauth_key_function_pointer, 1, \ + __ptrauth_unwind_cie_info_personality_disc) + + // ptrauth_string_discriminator("personality") == 0x7EAD) + #define __ptrauth_unwind_pauthtest_personality_disc 0x7EAD + +#else + + #define __unwind_ptrauth_restricted_intptr(...) + #define __ptrauth_unwind_upi_handler + #define __ptrauth_unwind_upi_handler_intptr + #define __ptrauth_unwind_upi_startip + #define __ptrauth_unwind_upi_endip + #define __ptrauth_unwind_upi_lsda + #define __ptrauth_unwind_upi_flags + #define __ptrauth_unwind_upi_info + #define __ptrauth_unwind_upi_extra + #define __ptrauth_unwind_registers_arm64_link_reg + #define __ptrauth_unwind_uis_dso_base + #define __ptrauth_unwind_uis_dwarf_section + #define __ptrauth_unwind_uis_dwarf_section_length + #define __ptrauth_unwind_uis_compact_unwind_section + #define __ptrauth_unwind_uis_compact_unwind_section_length + #define __ptrauth_unwind_cie_info_personality + +#endif + #if defined(_WIN32) && defined(__SEH__) #define LIBUNWIND_CURSOR_ALIGNMENT_ATTR __attribute__((__aligned__(16))) #else @@ -88,17 +191,18 @@ typedef double unw_fpreg_t; #endif struct unw_proc_info_t { - unw_word_t start_ip; /* start address of function */ - unw_word_t end_ip; /* address after end of function */ - unw_word_t lsda; /* address of language specific data area, */ - /* or zero if not used */ - unw_word_t handler; /* personality routine, or zero if not used */ - unw_word_t gp; /* not used */ - unw_word_t flags; /* not used */ - uint32_t format; /* compact unwind encoding, or zero if none */ - uint32_t unwind_info_size; /* size of DWARF unwind info, or zero if none */ - unw_word_t unwind_info; /* address of DWARF unwind info, or zero */ - unw_word_t extra; /* mach_header of mach-o image containing func */ + unw_word_t __ptrauth_unwind_upi_startip start_ip; /* start address of function */ + unw_word_t __ptrauth_unwind_upi_endip end_ip; /* address after end of function */ + unw_word_t __ptrauth_unwind_upi_lsda lsda; /* address of language specific data area, */ + /* or zero if not used */ + + unw_word_t __ptrauth_unwind_upi_handler_intptr handler; + unw_word_t gp; /* not used */ + unw_word_t __ptrauth_unwind_upi_flags flags; /* not used */ + uint32_t format; /* compact unwind encoding, or zero if none */ + uint32_t unwind_info_size; /* size of DWARF unwind info, or zero if none */ + unw_word_t __ptrauth_unwind_upi_info unwind_info; /* address of DWARF unwind info, or zero */ + unw_word_t __ptrauth_unwind_upi_extra extra; /* mach_header of mach-o image containing func */ }; typedef struct unw_proc_info_t unw_proc_info_t; diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index 5551c7d..63f9cb3 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -129,22 +129,27 @@ struct UnwindInfoSections { defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) || \ defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) // No dso_base for SEH. - uintptr_t dso_base; + uintptr_t __ptrauth_unwind_uis_dso_base + dso_base = 0; #endif #if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) size_t text_segment_length; #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - uintptr_t dwarf_section; - size_t dwarf_section_length; + uintptr_t __ptrauth_unwind_uis_dwarf_section + dwarf_section = 0; + size_t __ptrauth_unwind_uis_dwarf_section_length + dwarf_section_length = 0; #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) uintptr_t dwarf_index_section; size_t dwarf_index_section_length; #endif #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - uintptr_t compact_unwind_section; - size_t compact_unwind_section_length; + uintptr_t __ptrauth_unwind_uis_compact_unwind_section + compact_unwind_section = 0; + size_t __ptrauth_unwind_uis_compact_unwind_section_length + compact_unwind_section_length = 0; #endif #if defined(_LIBUNWIND_ARM_EHABI) uintptr_t arm_section; @@ -196,7 +201,7 @@ public: static int64_t getSLEB128(pint_t &addr, pint_t end); pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, - pint_t datarelBase = 0); + pint_t datarelBase = 0, pint_t *resultAddr = nullptr); bool findFunctionName(pint_t addr, char *buf, size_t bufLen, unw_word_t *offset); bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info); @@ -269,7 +274,7 @@ inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) { inline LocalAddressSpace::pint_t LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, - pint_t datarelBase) { + pint_t datarelBase, pint_t *resultAddr) { pint_t startAddr = addr; const uint8_t *p = (uint8_t *)addr; pint_t result; @@ -353,8 +358,14 @@ LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, break; } - if (encoding & DW_EH_PE_indirect) + if (encoding & DW_EH_PE_indirect) { + if (resultAddr) + *resultAddr = result; result = getP(result); + } else { + if (resultAddr) + *resultAddr = startAddr; + } return result; } diff --git a/libunwind/src/CompactUnwinder.hpp b/libunwind/src/CompactUnwinder.hpp index a7a8a15..cd2e0e3 100644 --- a/libunwind/src/CompactUnwinder.hpp +++ b/libunwind/src/CompactUnwinder.hpp @@ -601,11 +601,17 @@ int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrameless( savedRegisterLoc -= 8; } + // We load the link register prior to setting the new SP as the authentication + // schema for LR entangles the SP of the old frame into the diversifier. + Registers_arm64::reg_t linkRegister = registers.getRegister(UNW_AARCH64_LR); + // subtract stack size off of sp registers.setSP(savedRegisterLoc); - // set pc to be value in lr - registers.setIP(registers.getRegister(UNW_AARCH64_LR)); + // Set pc to be value in lr. This needs to be performed after the new SP has + // been set, as the PC authentication schema entangles the SP of the new + // frame. + registers.setIP(linkRegister); return UNW_STEP_SUCCESS; } @@ -614,7 +620,7 @@ template <typename A> int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame( compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, Registers_arm64 ®isters) { - uint64_t savedRegisterLoc = registers.getFP() - 8; + Registers_arm64::reg_t savedRegisterLoc = registers.getFP() - 8; if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc)); @@ -680,11 +686,16 @@ int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame( savedRegisterLoc -= 8; } - uint64_t fp = registers.getFP(); + Registers_arm64::reg_t fp = registers.getFP(); + // fp points to old fp registers.setFP(addressSpace.get64(fp)); - // old sp is fp less saved fp and lr + + // Old sp is fp less saved fp and lr. We need to set this prior to setting + // the lr as the pointer authentication schema for the lr incorporates the + // sp as part of the diversifier. registers.setSP(fp + 16); + // pop return address into pc registers.setIP(addressSpace.get64(fp + 8)); diff --git a/libunwind/src/DwarfInstructions.hpp b/libunwind/src/DwarfInstructions.hpp index e7be0d6..d2822e8 100644 --- a/libunwind/src/DwarfInstructions.hpp +++ b/libunwind/src/DwarfInstructions.hpp @@ -22,7 +22,6 @@ #include "dwarf2.h" #include "libunwind_ext.h" - namespace libunwind { @@ -34,8 +33,9 @@ public: typedef typename A::pint_t pint_t; typedef typename A::sint_t sint_t; - static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart, - R ®isters, bool &isSignalFrame, bool stage2); + static int stepWithDwarf(A &addressSpace, const typename R::link_reg_t &pc, + pint_t fdeStart, R ®isters, bool &isSignalFrame, + bool stage2); private: @@ -64,9 +64,10 @@ private: static pint_t getCFA(A &addressSpace, const PrologInfo &prolog, const R ®isters) { - if (prolog.cfaRegister != 0) - return (pint_t)((sint_t)registers.getRegister((int)prolog.cfaRegister) + - prolog.cfaRegisterOffset); + if (prolog.cfaRegister != 0) { + uintptr_t cfaRegister = registers.getRegister((int)prolog.cfaRegister); + return (pint_t)(cfaRegister + prolog.cfaRegisterOffset); + } if (prolog.cfaExpression != 0) return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace, registers, 0); @@ -207,7 +208,8 @@ bool DwarfInstructions<A, R>::isReturnAddressSignedWithPC(A &addressSpace, #endif template <typename A, typename R> -int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc, +int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, + const typename R::link_reg_t &pc, pint_t fdeStart, R ®isters, bool &isSignalFrame, bool stage2) { FDE_Info fdeInfo; @@ -264,7 +266,7 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc, // by a CFI directive later on. newRegisters.setSP(cfa); - pint_t returnAddress = 0; + typename R::reg_t returnAddress = 0; constexpr int lastReg = R::lastDwarfRegNum(); static_assert(static_cast<int>(CFI_Parser<A>::kMaxRegisterNumber) >= lastReg, @@ -300,7 +302,16 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc, isSignalFrame = cieInfo.isSignalFrame; -#if defined(_LIBUNWIND_TARGET_AARCH64) +#if defined(_LIBUNWIND_TARGET_AARCH64) && \ + !defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + // There are two ways of return address signing: pac-ret (enabled via + // -mbranch-protection=pac-ret) and ptrauth-returns (enabled as part of + // Apple's arm64e or experimental pauthtest ABI on Linux). The code + // below handles signed RA for pac-ret, while ptrauth-returns uses + // different logic. + // TODO: unify logic for both cases, see + // https://github.com/llvm/llvm-project/issues/160110 + // // If the target is aarch64 then the return address may have been signed // using the v8.3 pointer authentication extensions. The original // return address needs to be authenticated before the return address is diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp index 25250e0..dbd7d65 100644 --- a/libunwind/src/DwarfParser.hpp +++ b/libunwind/src/DwarfParser.hpp @@ -23,6 +23,10 @@ #include "config.h" +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) +#include <ptrauth.h> +#endif + namespace libunwind { /// CFI_Parser does basic parsing of a CFI (Call Frame Information) records. @@ -33,6 +37,7 @@ template <typename A> class CFI_Parser { public: typedef typename A::pint_t pint_t; + typedef pint_t __ptrauth_unwind_cie_info_personality personality_t; /// Information encoded in a CIE (Common Information Entry) struct CIE_Info { @@ -43,7 +48,7 @@ public: uint8_t lsdaEncoding; uint8_t personalityEncoding; uint8_t personalityOffsetInCIE; - pint_t personality; + personality_t personality; uint32_t codeAlignFactor; int dataAlignFactor; bool isSignalFrame; @@ -369,6 +374,7 @@ const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie, cieInfo->returnAddressRegister = (uint8_t)raReg; // parse augmentation data based on augmentation string const char *result = NULL; + pint_t resultAddr = 0; if (addressSpace.get8(strStart) == 'z') { // parse augmentation data length addressSpace.getULEB128(p, cieContentEnd); @@ -377,13 +383,41 @@ const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie, case 'z': cieInfo->fdesHaveAugmentationData = true; break; - case 'P': + case 'P': { cieInfo->personalityEncoding = addressSpace.get8(p); ++p; cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie); - cieInfo->personality = addressSpace - .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding); + pint_t personality = addressSpace.getEncodedP( + p, cieContentEnd, cieInfo->personalityEncoding, + /*datarelBase=*/0, &resultAddr); +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + if (personality) { + // The GOT for the personality function was signed address + // authenticated. Manually re-sign with the CIE_Info::personality + // schema. If we could guarantee the encoding of the personality we + // could avoid this by simply giving resultAddr the correct ptrauth + // schema and performing an assignment. +#if defined(__arm64e__) + const auto oldDiscriminator = resultAddr; +#else + const auto oldDiscriminator = ptrauth_blend_discriminator( + (void *)resultAddr, __ptrauth_unwind_pauthtest_personality_disc); +#endif + const auto discriminator = ptrauth_blend_discriminator( + &cieInfo->personality, + __ptrauth_unwind_cie_info_personality_disc); + void *signedPtr = ptrauth_auth_and_resign( + (void *)personality, ptrauth_key_function_pointer, + oldDiscriminator, ptrauth_key_function_pointer, discriminator); + personality = (pint_t)signedPtr; + } +#endif + // We use memmove to set the CIE personality as we have already + // re-signed the pointer to the correct schema. + memmove((void *)&cieInfo->personality, (void *)&personality, + sizeof(personality)); break; + } case 'L': cieInfo->lsdaEncoding = addressSpace.get8(p); ++p; diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index 8b3055f..5a5b578 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -17,6 +17,7 @@ #include "config.h" #include "libunwind.h" +#include "libunwind_ext.h" #include "shadow_stack_unwind.h" namespace libunwind { @@ -60,6 +61,9 @@ public: Registers_x86(); Registers_x86(const void *registers); + typedef uint32_t reg_t; + typedef uint32_t link_reg_t; + bool validRegister(int num) const; uint32_t getRegister(int num) const; void setRegister(int num, uint32_t value); @@ -278,6 +282,9 @@ public: Registers_x86_64(); Registers_x86_64(const void *registers); + typedef uint64_t reg_t; + typedef uint64_t link_reg_t; + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); @@ -597,6 +604,9 @@ public: Registers_ppc(); Registers_ppc(const void *registers); + typedef uint32_t reg_t; + typedef uint32_t link_reg_t; + bool validRegister(int num) const; uint32_t getRegister(int num) const; void setRegister(int num, uint32_t value); @@ -1169,6 +1179,9 @@ public: Registers_ppc64(); Registers_ppc64(const void *registers); + typedef uint64_t reg_t; + typedef uint64_t link_reg_t; + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); @@ -1826,6 +1839,11 @@ class _LIBUNWIND_HIDDEN Registers_arm64 { public: Registers_arm64(); Registers_arm64(const void *registers); + Registers_arm64(const Registers_arm64 &); + Registers_arm64 &operator=(const Registers_arm64 &); + + typedef uint64_t reg_t; + typedef uint64_t __ptrauth_unwind_registers_arm64_link_reg link_reg_t; bool validRegister(int num) const; uint64_t getRegister(int num) const; @@ -1845,10 +1863,47 @@ public: uint64_t getSP() const { return _registers.__sp; } void setSP(uint64_t value) { _registers.__sp = value; } - uint64_t getIP() const { return _registers.__pc; } - void setIP(uint64_t value) { _registers.__pc = value; } - uint64_t getFP() const { return _registers.__fp; } - void setFP(uint64_t value) { _registers.__fp = value; } + uint64_t getIP() const { + uint64_t value = _registers.__pc; +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + // Note the value of the PC was signed to its address in the register state + // but everyone else expects it to be sign by the SP, so convert on return. + value = (uint64_t)ptrauth_auth_and_resign((void *)_registers.__pc, + ptrauth_key_return_address, + &_registers.__pc, + ptrauth_key_return_address, + getSP()); +#endif + return value; + } + void setIP(uint64_t value) { +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + // Note the value which was set should have been signed with the SP. + // We then resign with the slot we are being stored in to so that both SP + // and LR can't be spoofed at the same time. + value = (uint64_t)ptrauth_auth_and_resign((void *)value, + ptrauth_key_return_address, + getSP(), + ptrauth_key_return_address, + &_registers.__pc); +#endif + _registers.__pc = value; + } + uint64_t getFP() const { return _registers.__fp; } + void setFP(uint64_t value) { _registers.__fp = value; } + +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + void + loadAndAuthenticateLinkRegister(reg_t inplaceAuthedLinkRegister, + link_reg_t *referenceAuthedLinkRegister) { + // If we are in an arm64/arm64e frame, then the PC should have been signed + // with the SP + *referenceAuthedLinkRegister = + (uint64_t)ptrauth_auth_data((void *)inplaceAuthedLinkRegister, + ptrauth_key_return_address, + _registers.__sp); + } +#endif private: uint64_t lazyGetVG() const; @@ -1889,11 +1944,35 @@ inline Registers_arm64::Registers_arm64(const void *registers) { memcpy(_vectorHalfRegisters, static_cast<const uint8_t *>(registers) + sizeof(GPRs), sizeof(_vectorHalfRegisters)); + _misc_registers.__vg = 0; + +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + // We have to do some pointer authentication fixups after this copy, + // and as part of that we need to load the source pc without + // authenticating so that we maintain the signature for the resigning + // performed by setIP. + uint64_t pcRegister = 0; + memmove(&pcRegister, ((uint8_t *)&_registers) + offsetof(GPRs, __pc), + sizeof(pcRegister)); + setIP(pcRegister); +#endif +} + +inline Registers_arm64::Registers_arm64(const Registers_arm64 &other) { + *this = other; +} + +inline Registers_arm64 & +Registers_arm64::operator=(const Registers_arm64 &other) { + memmove(static_cast<void *>(this), &other, sizeof(*this)); + // We perform this step to ensure that we correctly authenticate and re-sign + // the pc after the bitwise copy. + setIP(other.getIP()); + return *this; } inline Registers_arm64::Registers_arm64() { - memset(&_registers, 0, sizeof(_registers)); - memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters)); + memset(static_cast<void *>(this), 0, sizeof(*this)); } inline bool Registers_arm64::validRegister(int regNum) const { @@ -1930,13 +2009,13 @@ inline uint64_t Registers_arm64::lazyGetVG() const { inline uint64_t Registers_arm64::getRegister(int regNum) const { if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC) - return _registers.__pc; + return getIP(); if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP) return _registers.__sp; if (regNum == UNW_AARCH64_RA_SIGN_STATE) return _registers.__ra_sign_state; if (regNum == UNW_AARCH64_FP) - return _registers.__fp; + return getFP(); if (regNum == UNW_AARCH64_LR) return _registers.__lr; if (regNum == UNW_AARCH64_VG) @@ -1948,13 +2027,13 @@ inline uint64_t Registers_arm64::getRegister(int regNum) const { inline void Registers_arm64::setRegister(int regNum, uint64_t value) { if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC) - _registers.__pc = value; + setIP(value); else if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP) _registers.__sp = value; else if (regNum == UNW_AARCH64_RA_SIGN_STATE) _registers.__ra_sign_state = value; else if (regNum == UNW_AARCH64_FP) - _registers.__fp = value; + setFP(value); else if (regNum == UNW_AARCH64_LR) _registers.__lr = value; else if (regNum == UNW_AARCH64_VG) @@ -2148,6 +2227,9 @@ public: Registers_arm(); Registers_arm(const void *registers); + typedef uint32_t reg_t; + typedef uint32_t link_reg_t; + bool validRegister(int num) const; uint32_t getRegister(int num) const; void setRegister(int num, uint32_t value); @@ -2653,6 +2735,9 @@ public: Registers_or1k(); Registers_or1k(const void *registers); + typedef uint32_t reg_t; + typedef uint32_t link_reg_t; + bool validRegister(int num) const; uint32_t getRegister(int num) const; void setRegister(int num, uint32_t value); @@ -2852,6 +2937,9 @@ public: Registers_mips_o32(); Registers_mips_o32(const void *registers); + typedef uint32_t reg_t; + typedef uint32_t link_reg_t; + bool validRegister(int num) const; uint32_t getRegister(int num) const; void setRegister(int num, uint32_t value); @@ -3187,6 +3275,9 @@ public: Registers_mips_newabi(); Registers_mips_newabi(const void *registers); + typedef uint64_t reg_t; + typedef uint64_t link_reg_t; + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); @@ -3490,6 +3581,9 @@ public: Registers_sparc(); Registers_sparc(const void *registers); + typedef uint32_t reg_t; + typedef uint32_t link_reg_t; + bool validRegister(int num) const; uint32_t getRegister(int num) const; void setRegister(int num, uint32_t value); @@ -3676,6 +3770,9 @@ public: Registers_sparc64() = default; Registers_sparc64(const void *registers); + typedef uint64_t reg_t; + typedef uint64_t link_reg_t; + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); @@ -3861,6 +3958,9 @@ public: Registers_hexagon(); Registers_hexagon(const void *registers); + typedef uint32_t reg_t; + typedef uint32_t link_reg_t; + bool validRegister(int num) const; uint32_t getRegister(int num) const; void setRegister(int num, uint32_t value); @@ -4076,6 +4176,9 @@ public: Registers_riscv(); Registers_riscv(const void *registers); + typedef ::libunwind::reg_t reg_t; + typedef ::libunwind::reg_t link_reg_t; + bool validRegister(int num) const; reg_t getRegister(int num) const; void setRegister(int num, reg_t value); @@ -4373,6 +4476,9 @@ public: Registers_ve(); Registers_ve(const void *registers); + typedef uint64_t reg_t; + typedef uint64_t link_reg_t; + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); @@ -4816,6 +4922,9 @@ public: Registers_s390x(); Registers_s390x(const void *registers); + typedef uint64_t reg_t; + typedef uint64_t link_reg_t; + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); @@ -5104,6 +5213,9 @@ public: Registers_loongarch(); Registers_loongarch(const void *registers); + typedef uint64_t reg_t; + typedef uint64_t link_reg_t; + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 9a1afd3..7ec5f9e 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -1047,18 +1047,26 @@ private: bool getInfoFromFdeCie(const typename CFI_Parser<A>::FDE_Info &fdeInfo, const typename CFI_Parser<A>::CIE_Info &cieInfo, pint_t pc, uintptr_t dso_base); - bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections §s, - uint32_t fdeSectionOffsetHint=0); + bool getInfoFromDwarfSection(const typename R::link_reg_t &pc, + const UnwindInfoSections §s, + uint32_t fdeSectionOffsetHint = 0); int stepWithDwarfFDE(bool stage2) { +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + typename R::reg_t rawPC = this->getReg(UNW_REG_IP); + typename R::link_reg_t pc; + _registers.loadAndAuthenticateLinkRegister(rawPC, &pc); +#else + typename R::link_reg_t pc = this->getReg(UNW_REG_IP); +#endif return DwarfInstructions<A, R>::stepWithDwarf( - _addressSpace, (pint_t)this->getReg(UNW_REG_IP), - (pint_t)_info.unwind_info, _registers, _isSignalFrame, stage2); + _addressSpace, pc, (pint_t)_info.unwind_info, _registers, + _isSignalFrame, stage2); } #endif #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - bool getInfoFromCompactEncodingSection(pint_t pc, - const UnwindInfoSections §s); + bool getInfoFromCompactEncodingSection(const typename R::link_reg_t &pc, + const UnwindInfoSections §s); int stepWithCompactEncoding(bool stage2 = false) { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) if ( compactSaysUseDwarf() ) @@ -1359,13 +1367,13 @@ UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as) "UnwindCursor<> does not fit in unw_cursor_t"); static_assert((alignof(UnwindCursor<A, R>) <= alignof(unw_cursor_t)), "UnwindCursor<> requires more alignment than unw_cursor_t"); - memset(&_info, 0, sizeof(_info)); + memset(static_cast<void *>(&_info), 0, sizeof(_info)); } template <typename A, typename R> UnwindCursor<A, R>::UnwindCursor(A &as, void *) : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) { - memset(&_info, 0, sizeof(_info)); + memset(static_cast<void *>(&_info), 0, sizeof(_info)); // FIXME // fill in _registers from thread arg } @@ -1683,9 +1691,9 @@ bool UnwindCursor<A, R>::getInfoFromFdeCie( } template <typename A, typename R> -bool UnwindCursor<A, R>::getInfoFromDwarfSection(pint_t pc, - const UnwindInfoSections §s, - uint32_t fdeSectionOffsetHint) { +bool UnwindCursor<A, R>::getInfoFromDwarfSection( + const typename R::link_reg_t &pc, const UnwindInfoSections §s, + uint32_t fdeSectionOffsetHint) { typename CFI_Parser<A>::FDE_Info fdeInfo; typename CFI_Parser<A>::CIE_Info cieInfo; bool foundFDE = false; @@ -1743,8 +1751,8 @@ bool UnwindCursor<A, R>::getInfoFromDwarfSection(pint_t pc, #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) template <typename A, typename R> -bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc, - const UnwindInfoSections §s) { +bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection( + const typename R::link_reg_t &pc, const UnwindInfoSections §s) { const bool log = false; if (log) fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n", @@ -1975,6 +1983,16 @@ bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc, personalityIndex * sizeof(uint32_t)); pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta; personality = _addressSpace.getP(personalityPointer); +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + // The GOT for the personality function was signed address authenticated. + // Resign it as a regular function pointer. + const auto discriminator = ptrauth_blend_discriminator( + &_info.handler, __ptrauth_unwind_upi_handler_disc); + void *signedPtr = ptrauth_auth_and_resign( + (void *)personality, ptrauth_key_function_pointer, personalityPointer, + ptrauth_key_function_pointer, discriminator); + personality = (__typeof(personality))signedPtr; +#endif if (log) fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " "personalityDelta=0x%08X, personality=0x%08llX\n", @@ -1988,7 +2006,11 @@ bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc, _info.start_ip = funcStart; _info.end_ip = funcEnd; _info.lsda = lsda; - _info.handler = personality; + // We use memmove to copy the personality function as we have already manually + // re-signed the pointer, and assigning directly will attempt to incorrectly + // sign the already signed value. + memmove(reinterpret_cast<void *>(&_info.handler), + reinterpret_cast<void *>(&personality), sizeof(personality)); _info.gp = 0; _info.flags = 0; _info.format = encoding; @@ -2641,11 +2663,19 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) { _isSigReturn = false; #endif - pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP)); + typename R::reg_t rawPC = this->getReg(UNW_REG_IP); + #if defined(_LIBUNWIND_ARM_EHABI) // Remove the thumb bit so the IP represents the actual instruction address. // This matches the behaviour of _Unwind_GetIP on arm. - pc &= (pint_t)~0x1; + rawPC &= (pint_t)~0x1; +#endif + + typename R::link_reg_t pc; +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + _registers.loadAndAuthenticateLinkRegister(rawPC, &pc); +#else + pc = rawPC; #endif // Exit early if at the top of the stack. @@ -3189,16 +3219,22 @@ template <typename A, typename R> int UnwindCursor<A, R>::step(bool stage2) { template <typename A, typename R> void UnwindCursor<A, R>::getInfo(unw_proc_info_t *info) { if (_unwindInfoMissing) - memset(info, 0, sizeof(*info)); + memset(static_cast<void *>(info), 0, sizeof(*info)); else *info = _info; } template <typename A, typename R> bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen, - unw_word_t *offset) { - return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP), - buf, bufLen, offset); + unw_word_t *offset) { +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + typename R::reg_t rawPC = this->getReg(UNW_REG_IP); + typename R::link_reg_t pc; + _registers.loadAndAuthenticateLinkRegister(rawPC, &pc); +#else + typename R::link_reg_t pc = this->getReg(UNW_REG_IP); +#endif + return _addressSpace.findFunctionName(pc, buf, bufLen, offset); } #if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c index f3b451a..b0cd60d 100644 --- a/libunwind/src/UnwindLevel1.c +++ b/libunwind/src/UnwindLevel1.c @@ -90,6 +90,23 @@ } while (0) #endif +// We need this helper function as the semantics of casting between integers and +// function pointers mean that we end up with a function pointer without the +// correct signature. Instead we assign to an integer with a matching schema, +// and then memmove the result into a variable of the correct type. This memmove +// is possible as `_Unwind_Personality_Fn` is a standard function pointer, and +// as such is not address diversified. +static _Unwind_Personality_Fn get_handler_function(unw_proc_info_t *frameInfo) { + uintptr_t __unwind_ptrauth_restricted_intptr(ptrauth_key_function_pointer, + 0, + ptrauth_function_pointer_type_discriminator(_Unwind_Personality_Fn)) + reauthenticatedIntegerHandler = frameInfo->handler; + _Unwind_Personality_Fn handler; + memmove(&handler, (void *)&reauthenticatedIntegerHandler, + sizeof(_Unwind_Personality_Fn)); + return handler; +} + static _Unwind_Reason_Code unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { __unw_init_local(cursor, uc); @@ -147,8 +164,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except // If there is a personality routine, ask it if it will want to stop at // this frame. if (frameInfo.handler != 0) { - _Unwind_Personality_Fn p = - (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler); + _Unwind_Personality_Fn p = get_handler_function(&frameInfo); _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_obj=%p): calling personality function %p", (void *)exception_object, (void *)(uintptr_t)p); @@ -276,8 +292,7 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, ++framesWalked; // If there is a personality routine, tell it we are unwinding. if (frameInfo.handler != 0) { - _Unwind_Personality_Fn p = - (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler); + _Unwind_Personality_Fn p = get_handler_function(&frameInfo); _Unwind_Action action = _UA_CLEANUP_PHASE; if (sp == exception_object->private_2) { // Tell personality this was the frame it marked in phase 1. @@ -394,8 +409,7 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, ++framesWalked; // If there is a personality routine, tell it we are unwinding. if (frameInfo.handler != 0) { - _Unwind_Personality_Fn p = - (_Unwind_Personality_Fn)(intptr_t)(frameInfo.handler); + _Unwind_Personality_Fn p = get_handler_function(&frameInfo); _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2_forced(ex_obj=%p): calling personality function %p", (void *)exception_object, (void *)(uintptr_t)p); @@ -597,6 +611,18 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_word_t result; __unw_get_reg(cursor, UNW_REG_IP, &result); + +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + // If we are in an arm64e frame, then the PC should have been signed with the + // sp + { + unw_word_t sp; + __unw_get_reg(cursor, UNW_REG_SP, &sp); + result = (unw_word_t)ptrauth_auth_data((void *)result, + ptrauth_key_return_address, sp); + } +#endif + _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIxPTR, (void *)context, result); return (uintptr_t)result; diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 1077d80..5d71d2cf 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -659,7 +659,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto) ldp x24,x25, [x0, #0x0C0] ldp x26,x27, [x0, #0x0D0] ldp x28,x29, [x0, #0x0E0] - ldr x30, [x0, #0x100] // restore pc into lr + #if defined(__ARM_FP) && __ARM_FP != 0 ldp d0, d1, [x0, #0x110] ldp d2, d3, [x0, #0x120] @@ -683,7 +683,18 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto) // context struct, because it is allocated on the stack, and an exception // could clobber the de-allocated portion of the stack after sp has been // restored. - ldr x16, [x0, #0x0F8] + + ldr x16, [x0, #0x0F8] // load sp into scratch + ldr lr, [x0, #0x100] // restore pc into lr + +#if __has_feature(ptrauth_calls) + // The LR is signed with its address inside the register state. Time + // to resign to be a regular ROP protected signed pointer + add x1, x0, #0x100 + autib lr, x1 + pacib lr, x16 // signed the scratch register for sp +#endif + ldp x0, x1, [x0, #0x000] // restore x0,x1 mov sp,x16 // restore sp #if defined(__ARM_FEATURE_GCS_DEFAULT) @@ -696,7 +707,12 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto) gcspushm x30 Lnogcs: #endif + +#if __has_feature(ptrauth_calls) + retab +#else ret x30 // jump to pc +#endif #elif defined(__arm__) && !defined(__APPLE__) diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index 8bf99eb..fe3ba78 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -769,6 +769,11 @@ LnoR2Fix: // .p2align 2 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + +#if __has_feature(ptrauth_calls) + pacibsp +#endif + stp x0, x1, [x0, #0x000] stp x2, x3, [x0, #0x010] stp x4, x5, [x0, #0x020] @@ -809,7 +814,12 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) str d31, [x0, #0x208] #endif mov x0, #0 // return UNW_ESUCCESS + +#if __has_feature(ptrauth_calls) + retab +#else ret +#endif #elif defined(__arm__) && !defined(__APPLE__) diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index cf39ec5..951d87d 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -118,14 +118,52 @@ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, typedef LocalAddressSpace::pint_t pint_t; AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; if (co->validReg(regNum)) { - co->setReg(regNum, (pint_t)value); // special case altering IP to re-find info (being called by personality // function) if (regNum == UNW_REG_IP) { unw_proc_info_t info; // First, get the FDE for the old location and then update it. co->getInfo(&info); - co->setInfoBasedOnIPRegister(false); + + pint_t sp = (pint_t)co->getReg(UNW_REG_SP); + +#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING) + { + // It is only valid to set the IP within the current function. This is + // important for ptrauth, otherwise the IP cannot be correctly signed. + // The current signature of `value` is via the schema: + // __ptrauth(ptrauth_key_return_address, <<sp>>, 0) + // For this to be generally usable we manually re-sign it to the + // directly supported schema: + // __ptrauth(ptrauth_key_return_address, 1, 0) + unw_word_t + __unwind_ptrauth_restricted_intptr(ptrauth_key_return_address, 1, + 0) authenticated_value; + unw_word_t opaque_value = (uint64_t)ptrauth_auth_and_resign( + (void *)value, ptrauth_key_return_address, sp, + ptrauth_key_return_address, &authenticated_value); + memmove(reinterpret_cast<void *>(&authenticated_value), + reinterpret_cast<void *>(&opaque_value), + sizeof(authenticated_value)); + if (authenticated_value < info.start_ip || + authenticated_value > info.end_ip) + _LIBUNWIND_ABORT("PC vs frame info mismatch"); + + // PC should have been signed with the sp, so we verify that + // roundtripping does not fail. + pint_t pc = (pint_t)co->getReg(UNW_REG_IP); + if (ptrauth_auth_and_resign((void *)pc, ptrauth_key_return_address, sp, + ptrauth_key_return_address, + sp) != (void *)pc) { + _LIBUNWIND_LOG("Bad unwind through arm64e (0x%zX, 0x%zX)->0x%zX\n", + pc, sp, + (pint_t)ptrauth_auth_data( + (void *)pc, ptrauth_key_return_address, sp)); + _LIBUNWIND_ABORT("Bad unwind through arm64e"); + } + } +#endif + // If the original call expects stack adjustment, perform this now. // Normal frame unwinding would have included the offset already in the // CFA computation. @@ -133,7 +171,11 @@ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, // this should actually be - info.gp. LLVM doesn't currently support // any such platforms and Clang doesn't export a macro for them. if (info.gp) - co->setReg(UNW_REG_SP, co->getReg(UNW_REG_SP) + info.gp); + co->setReg(UNW_REG_SP, sp + info.gp); + co->setReg(UNW_REG_IP, value); + co->setInfoBasedOnIPRegister(false); + } else { + co->setReg(regNum, (pint_t)value); } return UNW_ESUCCESS; } diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index ddfa24d..4b9c941 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -721,11 +721,11 @@ void ScriptParser::readTarget() { static int precedence(StringRef op) { return StringSwitch<int>(op) - .Cases("*", "/", "%", 11) - .Cases("+", "-", 10) - .Cases("<<", ">>", 9) - .Cases("<", "<=", ">", ">=", 8) - .Cases("==", "!=", 7) + .Cases({"*", "/", "%"}, 11) + .Cases({"+", "-"}, 10) + .Cases({"<<", ">>"}, 9) + .Cases({"<", "<=", ">", ">="}, 8) + .Cases({"==", "!="}, 7) .Case("&", 6) .Case("^", 5) .Case("|", 4) diff --git a/lldb/include/lldb/API/SBMutex.h b/lldb/include/lldb/API/SBMutex.h index 717d5f8..826ad07 100644 --- a/lldb/include/lldb/API/SBMutex.h +++ b/lldb/include/lldb/API/SBMutex.h @@ -31,6 +31,10 @@ public: /// Releases ownership of this lock. void unlock() const; + /// Tries to lock the mutex. Returns immediately. On successful lock + /// acquisition returns true, otherwise returns false. + bool try_lock() const; + private: // Private constructor used by SBTarget to create the Target API mutex. // Requires a friend declaration. diff --git a/lldb/source/API/SBMutex.cpp b/lldb/source/API/SBMutex.cpp index 445076b..c7844de 100644 --- a/lldb/source/API/SBMutex.cpp +++ b/lldb/source/API/SBMutex.cpp @@ -58,3 +58,12 @@ void SBMutex::unlock() const { if (m_opaque_sp) m_opaque_sp->unlock(); } + +bool SBMutex::try_lock() const { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_sp) + return m_opaque_sp->try_lock(); + + return false; +} diff --git a/lldb/source/Utility/RegisterValue.cpp b/lldb/source/Utility/RegisterValue.cpp index 0e99451..12c349a 100644 --- a/lldb/source/Utility/RegisterValue.cpp +++ b/lldb/source/Utility/RegisterValue.cpp @@ -199,7 +199,7 @@ Status RegisterValue::SetValueFromData(const RegisterInfo ®_info, else if (reg_info.byte_size <= 16) { uint64_t data1 = src.GetU64(&src_offset); uint64_t data2 = src.GetU64(&src_offset); - if (src.GetByteOrder() == eByteOrderBig) { + if (src.GetByteOrder() == eByteOrderLittle) { int128.x[0] = data1; int128.x[1] = data2; } else { diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp index e7d9b89..de63c9d 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp @@ -181,7 +181,7 @@ void BaseRequestHandler::Run(const Request &request) { llvm::Error BaseRequestHandler::LaunchProcess( const protocol::LaunchRequestArguments &arguments) const { - auto launchCommands = arguments.launchCommands; + const std::vector<std::string> &launchCommands = arguments.launchCommands; // Instantiate a launch info instance for the target. auto launch_info = dap.target.GetLaunchInfo(); diff --git a/lldb/unittests/API/SBMutexTest.cpp b/lldb/unittests/API/SBMutexTest.cpp index aafad59..18dc420 100644 --- a/lldb/unittests/API/SBMutexTest.cpp +++ b/lldb/unittests/API/SBMutexTest.cpp @@ -36,11 +36,16 @@ TEST_F(SBMutexTest, LockTest) { std::future<void> f; { lldb::SBMutex lock = target.GetAPIMutex(); + + ASSERT_TRUE(lock.try_lock()); + lock.unlock(); + std::lock_guard<lldb::SBMutex> lock_guard(lock); ASSERT_FALSE(locked.exchange(true)); f = std::async(std::launch::async, [&]() { ASSERT_TRUE(locked); + EXPECT_FALSE(lock.try_lock()); target.BreakpointCreateByName("foo", "bar"); ASSERT_FALSE(locked); }); diff --git a/lldb/unittests/DAP/TestBase.cpp b/lldb/unittests/DAP/TestBase.cpp index 3721e09..83a3035 100644 --- a/lldb/unittests/DAP/TestBase.cpp +++ b/lldb/unittests/DAP/TestBase.cpp @@ -45,7 +45,7 @@ void TransportBase::SetUp() { /*client_name=*/"test_client", /*transport=*/*to_client, /*loop=*/loop); - auto server_handle = to_server->RegisterMessageHandler(loop, *dap.get()); + auto server_handle = to_server->RegisterMessageHandler(loop, *dap); EXPECT_THAT_EXPECTED(server_handle, Succeeded()); handles[0] = std::move(*server_handle); diff --git a/lldb/unittests/Utility/RegisterValueTest.cpp b/lldb/unittests/Utility/RegisterValueTest.cpp index e0863a4..6239dbe 100644 --- a/lldb/unittests/Utility/RegisterValueTest.cpp +++ b/lldb/unittests/Utility/RegisterValueTest.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "lldb/Utility/RegisterValue.h" +#include "lldb/Utility/DataExtractor.h" +#include "lldb/lldb-private-types.h" #include "gtest/gtest.h" #include <optional> @@ -54,3 +56,41 @@ TEST(RegisterValueTest, GetScalarValue) { Scalar((APInt(128, 0xffeeddccbbaa9988ull) << 64) | APInt(128, 0x7766554433221100))); } + +static const Scalar etalon128(APInt(128, 0xffeeddccbbaa9988ull) << 64 | + APInt(128, 0x7766554433221100ull)); + +void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) { + RegisterInfo ri{"uint128_register", + nullptr, + 16, + 0, + lldb::Encoding::eEncodingUint, + lldb::Format::eFormatDefault, + {0, 0, 0, LLDB_INVALID_REGNUM, 0}, + nullptr, + nullptr, + nullptr}; + DataExtractor src_extractor(src, 16, endianness, 8); + RegisterValue rv; + EXPECT_TRUE(rv.SetValueFromData(ri, src_extractor, 0, false).Success()); + Scalar s; + EXPECT_TRUE(rv.GetScalarValue(s)); + EXPECT_EQ(s, etalon128); +} + +// Test that the "RegisterValue::SetValueFromData" method works correctly +// with 128-bit little-endian data that represents an integer. +TEST(RegisterValueTest, SetValueFromData_128_le) { + uint8_t src[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; + TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderLittle); +} + +// Test that the "RegisterValue::SetValueFromData" method works correctly +// with 128-bit big-endian data that represents an integer. +TEST(RegisterValueTest, SetValueFromData_128_be) { + uint8_t src[] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88, + 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; + TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderBig); +} diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index e062032..f971bc0 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -6512,6 +6512,13 @@ operations. ``buffer/global/flat_load/store/atomic`` instructions to global memory are termed vector memory operations. +``global_load_lds`` or ``buffer/global_load`` instructions with the `lds` flag +are LDS DMA loads. They interact with caches as if the loaded data were +being loaded to registers and not to LDS, and so therefore support the same +cache modifiers. They cannot be performed atomically. They implement volatile +(via aux/cpol bit 31) and nontemporal (via metadata) as if they were loads +from the global address space. + Private address space uses ``buffer_load/store`` using the scratch V# (GFX6-GFX8), or ``scratch_load/store`` (GFX9-GFX11). Since only a single thread is accessing the memory, atomic memory orderings are not meaningful, and all diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.rst index 65dd794..8677d89 100644 --- a/llvm/docs/CodingStandards.rst +++ b/llvm/docs/CodingStandards.rst @@ -860,27 +860,40 @@ your private interface remains private and undisturbed by outsiders. It's okay to put extra implementation methods in a public class itself. Just make them private (or protected) and all is well. -Use Namespace Qualifiers to Implement Previously Declared Functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Use Namespace Qualifiers to Define Previously Declared Symbols +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When providing an out-of-line implementation of a function in a source file, do -not open namespace blocks in the source file. Instead, use namespace qualifiers -to help ensure that your definition matches an existing declaration. Do this: +When providing an out-of-line definition for various symbols (variables, +functions, opaque classes) in a source file, do not open namespace blocks in the +source file. Instead, use namespace qualifiers to help ensure that your +definition matches an existing declaration. Do this: .. code-block:: c++ // Foo.h namespace llvm { + extern int FooVal; int foo(const char *s); - } + + namespace detail { + class FooImpl; + } // namespace detail + } // namespace llvm // Foo.cpp #include "Foo.h" using namespace llvm; + + int llvm::FooVal; + int llvm::foo(const char *s) { // ... } + class detail::FooImpl { + // ... + } + Doing this helps to avoid bugs where the definition does not match the declaration from the header. For example, the following C++ code defines a new overload of ``llvm::foo`` instead of providing a definition for the existing diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst index 6a721eb..70daae4 100644 --- a/llvm/docs/CommandGuide/lit.rst +++ b/llvm/docs/CommandGuide/lit.rst @@ -641,7 +641,6 @@ TestRunner.py: %{S:real} %S after expanding all symbolic links and substitute drives %{p:real} %p after expanding all symbolic links and substitute drives %{t:real} %t after expanding all symbolic links and substitute drives - %{T:real} %T after expanding all symbolic links and substitute drives %{/s:real} %/s after expanding all symbolic links and substitute drives %{/S:real} %/S after expanding all symbolic links and substitute drives %{/p:real} %/p after expanding all symbolic links and substitute drives diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h index 26d5682..2262b11 100644 --- a/llvm/include/llvm/ADT/StringSwitch.h +++ b/llvm/include/llvm/ADT/StringSwitch.h @@ -98,11 +98,13 @@ public: return CasesImpl({S0, S1, S2}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, T Value) { return CasesImpl({S0, S1, S2, S3}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, StringLiteral S4, T Value) { return CasesImpl({S0, S1, S2, S3, S4}, Value); @@ -179,11 +181,13 @@ public: return CasesLowerImpl({S0, S1, S2}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, T Value) { return CasesLowerImpl({S0, S1, S2, S3}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, StringLiteral S4, T Value) { return CasesLowerImpl({S0, S1, S2, S3, S4}, Value); diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h index 8b60b69..5971b75 100644 --- a/llvm/include/llvm/ADT/bit.h +++ b/llvm/include/llvm/ADT/bit.h @@ -151,7 +151,7 @@ template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>> /// Count the number of set bits in a value. /// Ex. popcount(0xF000F000) = 8 /// Returns 0 if Value is zero. -template <typename T> [[nodiscard]] inline int popcount(T Value) noexcept { +template <typename T> [[nodiscard]] constexpr int popcount(T Value) noexcept { static_assert(std::is_unsigned_v<T>, "T must be an unsigned integer type"); static_assert(sizeof(T) <= 8, "T must be 8 bytes or less"); @@ -178,7 +178,23 @@ template <typename T> [[nodiscard]] inline int popcount(T Value) noexcept { } /// Count number of 0's from the least significant bit to the most -/// stopping at the first 1. +/// stopping at the first 1. +/// +/// A constexpr version of countr_zero. +/// +/// Only unsigned integral types are allowed. +/// +/// Returns std::numeric_limits<T>::digits on an input of 0. +template <typename T> [[nodiscard]] constexpr int countr_zero_constexpr(T Val) { + static_assert(std::is_unsigned_v<T>, + "Only unsigned integral types are allowed."); + // "(Val & -Val) - 1" generates a mask with all bits set up to (but not + // including) the least significant set bit of Val. + return llvm::popcount(static_cast<std::make_unsigned_t<T>>((Val & -Val) - 1)); +} + +/// Count number of 0's from the least significant bit to the most +/// stopping at the first 1. /// /// Only unsigned integral types are allowed. /// @@ -208,9 +224,7 @@ template <typename T> [[nodiscard]] int countr_zero(T Val) { #endif } - // Fallback to popcount. "(Val & -Val) - 1" is a bitmask with all bits below - // the least significant 1 set. - return llvm::popcount(static_cast<std::make_unsigned_t<T>>((Val & -Val) - 1)); + return countr_zero_constexpr(Val); } /// Count number of 0's from the most significant bit to the least diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 5039a3f..211c026 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -1128,6 +1128,9 @@ struct FormParams { uint8_t getDwarfOffsetByteSize() const { return dwarf::getDwarfOffsetByteSize(Format); } + inline uint64_t getDwarfMaxOffset() const { + return (getDwarfOffsetByteSize() == 4) ? UINT32_MAX : UINT64_MAX; + } explicit operator bool() const { return Version && AddrSize; } }; diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h index 2809df8..b7099dc 100644 --- a/llvm/include/llvm/CAS/OnDiskDataAllocator.h +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -64,7 +64,7 @@ public: /// \returns the buffer that was allocated at \p create time, with size /// \p UserHeaderSize. - MutableArrayRef<uint8_t> getUserHeader(); + MutableArrayRef<uint8_t> getUserHeader() const; size_t size() const; size_t capacity() const; diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h new file mode 100644 index 0000000..83017a6 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -0,0 +1,469 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length +/// hash. This is the class that implements the database storage scheme without +/// exposing the hashing algorithm. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKGRAPHDB_H +#define LLVM_CAS_ONDISKGRAPHDB_H + +#include "llvm/ADT/PointerUnion.h" +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" + +namespace llvm::cas::ondisk { + +/// Standard 8 byte reference inside OnDiskGraphDB. +class InternalRef { +public: + FileOffset getFileOffset() const { return FileOffset(Data); } + uint64_t getRawData() const { return Data; } + + static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); } + static InternalRef getFromOffset(FileOffset Offset) { + return InternalRef(Offset.get()); + } + + friend bool operator==(InternalRef LHS, InternalRef RHS) { + return LHS.Data == RHS.Data; + } + +private: + InternalRef(FileOffset Offset) : Data((uint64_t)Offset.get()) {} + InternalRef(uint64_t Data) : Data(Data) {} + uint64_t Data; +}; + +/// Compact 4 byte reference inside OnDiskGraphDB for smaller references. +class InternalRef4B { +public: + FileOffset getFileOffset() const { return FileOffset(Data); } + uint32_t getRawData() const { return Data; } + + /// Shrink to 4B reference. + static std::optional<InternalRef4B> tryToShrink(InternalRef Ref) { + uint64_t Offset = Ref.getRawData(); + if (Offset > UINT32_MAX) + return std::nullopt; + return InternalRef4B(Offset); + } + + operator InternalRef() const { + return InternalRef::getFromOffset(getFileOffset()); + } + +private: + friend class InternalRef; + InternalRef4B(uint32_t Data) : Data(Data) {} + uint32_t Data; +}; + +/// Array of internal node references. +class InternalRefArrayRef { +public: + size_t size() const { return Size; } + bool empty() const { return !Size; } + + class iterator + : public iterator_facade_base<iterator, std::random_access_iterator_tag, + const InternalRef> { + public: + bool operator==(const iterator &RHS) const { return I == RHS.I; } + InternalRef operator*() const { + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + return *Ref; + return InternalRef(*cast<const InternalRef4B *>(I)); + } + bool operator<(const iterator &RHS) const { + assert(isa<const InternalRef *>(I) == isa<const InternalRef *>(RHS.I)); + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + return Ref < cast<const InternalRef *>(RHS.I); + return cast<const InternalRef4B *>(I) - + cast<const InternalRef4B *>(RHS.I); + } + ptrdiff_t operator-(const iterator &RHS) const { + assert(isa<const InternalRef *>(I) == isa<const InternalRef *>(RHS.I)); + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + return Ref - cast<const InternalRef *>(RHS.I); + return cast<const InternalRef4B *>(I) - + cast<const InternalRef4B *>(RHS.I); + } + iterator &operator+=(ptrdiff_t N) { + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + I = Ref + N; + else + I = cast<const InternalRef4B *>(I) + N; + return *this; + } + iterator &operator-=(ptrdiff_t N) { + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + I = Ref - N; + else + I = cast<const InternalRef4B *>(I) - N; + return *this; + } + InternalRef operator[](ptrdiff_t N) const { return *(this->operator+(N)); } + + iterator() = default; + + uint64_t getOpaqueData() const { return uintptr_t(I.getOpaqueValue()); } + + static iterator fromOpaqueData(uint64_t Opaque) { + return iterator( + PointerUnion<const InternalRef *, + const InternalRef4B *>::getFromOpaqueValue((void *) + Opaque)); + } + + private: + friend class InternalRefArrayRef; + explicit iterator( + PointerUnion<const InternalRef *, const InternalRef4B *> I) + : I(I) {} + PointerUnion<const InternalRef *, const InternalRef4B *> I; + }; + + bool operator==(const InternalRefArrayRef &RHS) const { + return size() == RHS.size() && std::equal(begin(), end(), RHS.begin()); + } + + iterator begin() const { return iterator(Begin); } + iterator end() const { return begin() + Size; } + + /// Array accessor. + InternalRef operator[](ptrdiff_t N) const { return begin()[N]; } + + bool is4B() const { return isa<const InternalRef4B *>(Begin); } + bool is8B() const { return isa<const InternalRef *>(Begin); } + + ArrayRef<uint8_t> getBuffer() const { + if (is4B()) { + auto *B = cast<const InternalRef4B *>(Begin); + return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size); + } + auto *B = cast<const InternalRef *>(Begin); + return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size); + } + + InternalRefArrayRef(std::nullopt_t = std::nullopt) { + // This is useful so that all the casts in the \p iterator functions can + // operate without needing to check for a null value. + static InternalRef PlaceHolder = InternalRef::getFromRawData(0); + Begin = &PlaceHolder; + } + + InternalRefArrayRef(ArrayRef<InternalRef> Refs) + : Begin(Refs.begin()), Size(Refs.size()) {} + + InternalRefArrayRef(ArrayRef<InternalRef4B> Refs) + : Begin(Refs.begin()), Size(Refs.size()) {} + +private: + PointerUnion<const InternalRef *, const InternalRef4B *> Begin; + size_t Size = 0; +}; + +/// Reference to a node. The node's data may not be stored in the database. +/// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance +/// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not +/// comparable. +class ObjectID { +public: + uint64_t getOpaqueData() const { return Opaque; } + + static ObjectID fromOpaqueData(uint64_t Opaque) { return ObjectID(Opaque); } + + friend bool operator==(const ObjectID &LHS, const ObjectID &RHS) { + return LHS.Opaque == RHS.Opaque; + } + friend bool operator!=(const ObjectID &LHS, const ObjectID &RHS) { + return !(LHS == RHS); + } + +private: + explicit ObjectID(uint64_t Opaque) : Opaque(Opaque) {} + uint64_t Opaque; +}; + +/// Handle for a loaded node object. +class ObjectHandle { +public: + explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {} + uint64_t getOpaqueData() const { return Opaque; } + + static ObjectHandle fromFileOffset(FileOffset Offset); + static ObjectHandle fromMemory(uintptr_t Ptr); + + friend bool operator==(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return LHS.Opaque == RHS.Opaque; + } + friend bool operator!=(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return !(LHS == RHS); + } + +private: + uint64_t Opaque; +}; + +/// Iterator for ObjectID. +class object_refs_iterator + : public iterator_facade_base<object_refs_iterator, + std::random_access_iterator_tag, ObjectID> { +public: + bool operator==(const object_refs_iterator &RHS) const { return I == RHS.I; } + ObjectID operator*() const { + return ObjectID::fromOpaqueData((*I).getRawData()); + } + bool operator<(const object_refs_iterator &RHS) const { return I < RHS.I; } + ptrdiff_t operator-(const object_refs_iterator &RHS) const { + return I - RHS.I; + } + object_refs_iterator &operator+=(ptrdiff_t N) { + I += N; + return *this; + } + object_refs_iterator &operator-=(ptrdiff_t N) { + I -= N; + return *this; + } + ObjectID operator[](ptrdiff_t N) const { return *(this->operator+(N)); } + + object_refs_iterator() = default; + object_refs_iterator(InternalRefArrayRef::iterator I) : I(I) {} + + uint64_t getOpaqueData() const { return I.getOpaqueData(); } + + static object_refs_iterator fromOpaqueData(uint64_t Opaque) { + return InternalRefArrayRef::iterator::fromOpaqueData(Opaque); + } + +private: + InternalRefArrayRef::iterator I; +}; + +using object_refs_range = llvm::iterator_range<object_refs_iterator>; + +/// On-disk CAS nodes database, independent of a particular hashing algorithm. +class OnDiskGraphDB { +public: + /// Associate data & references with a particular object ID. If there is + /// already a record for this object the operation is a no-op. \param ID the + /// object ID to associate the data & references with. \param Refs references + /// \param Data data buffer. + Error store(ObjectID ID, ArrayRef<ObjectID> Refs, ArrayRef<char> Data); + + /// \returns \p nullopt if the object associated with \p Ref does not exist. + Expected<std::optional<ObjectHandle>> load(ObjectID Ref); + + /// \returns the hash bytes digest for the object reference. + ArrayRef<uint8_t> getDigest(ObjectID Ref) const { + // ObjectID should be valid to fetch Digest. + return cantFail(getDigest(getInternalRef(Ref))); + } + + /// Form a reference for the provided hash. The reference can be used as part + /// of a CAS object even if it's not associated with an object yet. + Expected<ObjectID> getReference(ArrayRef<uint8_t> Hash); + + /// Get an existing reference to the object \p Digest. + /// + /// Returns \p nullopt if the object is not stored in this CAS. + std::optional<ObjectID> getExistingReference(ArrayRef<uint8_t> Digest); + + /// Check whether the object associated with \p Ref is stored in the CAS. + /// Note that this function will fault-in according to the policy. + Expected<bool> isMaterialized(ObjectID Ref); + + /// Check whether the object associated with \p Ref is stored in the CAS. + /// Note that this function does not fault-in. + bool containsObject(ObjectID Ref) const { + return containsObject(Ref, /*CheckUpstream=*/true); + } + + /// \returns the data part of the provided object handle. + ArrayRef<char> getObjectData(ObjectHandle Node) const; + + /// \returns the object referenced by the provided object handle. + object_refs_range getObjectRefs(ObjectHandle Node) const { + InternalRefArrayRef Refs = getInternalRefs(Node); + return make_range(Refs.begin(), Refs.end()); + } + + /// \returns Total size of stored objects. + /// + /// NOTE: There's a possibility that the returned size is not including a + /// large object if the process crashed right at the point of inserting it. + size_t getStorageSize() const; + + /// \returns The precentage of space utilization of hard space limits. + /// + /// Return value is an integer between 0 and 100 for percentage. + unsigned getHardStorageLimitUtilization() const; + + void print(raw_ostream &OS) const; + + /// Hashing function type for validation. + using HashingFuncT = function_ref<void( + ArrayRef<ArrayRef<uint8_t>>, ArrayRef<char>, SmallVectorImpl<uint8_t> &)>; + + /// Validate the OnDiskGraphDB. + /// + /// \param Deep if true, rehash all the objects to ensure no data + /// corruption in stored objects, otherwise just validate the structure of + /// CAS database. + /// \param Hasher is the hashing function used for objects inside CAS. + Error validate(bool Deep, HashingFuncT Hasher) const; + + /// How to fault-in nodes if an upstream database is used. + enum class FaultInPolicy { + /// Copy only the requested node. + SingleNode, + /// Copy the the entire graph of a node. + FullTree, + }; + + /// Open the on-disk store from a directory. + /// + /// \param Path directory for the on-disk store. The directory will be created + /// if it doesn't exist. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param HashByteSize Size for the object digest hash bytes. + /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes + /// if they don't exist in the primary store. The upstream store is only used + /// for reading nodes, new nodes are only written to the primary store. + /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied + /// to primary store. This is recorded at creation time and subsequent opens + /// need to pass the same policy otherwise the \p open will fail. + static Expected<std::unique_ptr<OnDiskGraphDB>> + open(StringRef Path, StringRef HashName, unsigned HashByteSize, + std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr, + FaultInPolicy Policy = FaultInPolicy::FullTree); + + ~OnDiskGraphDB(); + +private: + /// Forward declaration for a proxy for an ondisk index record. + struct IndexProxy; + + enum class ObjectPresence { + Missing, + InPrimaryDB, + OnlyInUpstreamDB, + }; + + /// Check if object exists and if it is on upstream only. + Expected<ObjectPresence> getObjectPresence(ObjectID Ref, + bool CheckUpstream) const; + + /// \returns true if object can be found in database. + bool containsObject(ObjectID Ref, bool CheckUpstream) const { + auto Presence = getObjectPresence(Ref, CheckUpstream); + if (!Presence) { + consumeError(Presence.takeError()); + return false; + } + switch (*Presence) { + case ObjectPresence::Missing: + return false; + case ObjectPresence::InPrimaryDB: + return true; + case ObjectPresence::OnlyInUpstreamDB: + return true; + } + } + + /// When \p load is called for a node that doesn't exist, this function tries + /// to load it from the upstream store and copy it to the primary one. + Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID); + + /// Import the entire tree from upstream with \p UpstreamNode as root. + Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode); + /// Import only the \param UpstreamNode. + Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode); + + /// Found the IndexProxy for the hash. + Expected<IndexProxy> indexHash(ArrayRef<uint8_t> Hash); + + /// Get path for creating standalone data file. + void getStandalonePath(StringRef FileSuffix, const IndexProxy &I, + SmallVectorImpl<char> &Path) const; + /// Create a standalone leaf file. + Error createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data); + + /// \name Helper functions for internal data structures. + /// \{ + static InternalRef getInternalRef(ObjectID Ref) { + return InternalRef::getFromRawData(Ref.getOpaqueData()); + } + + static ObjectID getExternalReference(InternalRef Ref) { + return ObjectID::fromOpaqueData(Ref.getRawData()); + } + + static ObjectID getExternalReference(const IndexProxy &I); + + static InternalRef makeInternalRef(FileOffset IndexOffset); + + Expected<ArrayRef<uint8_t>> getDigest(InternalRef Ref) const; + + ArrayRef<uint8_t> getDigest(const IndexProxy &I) const; + + Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const; + + IndexProxy + getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const; + + InternalRefArrayRef getInternalRefs(ObjectHandle Node) const; + /// \} + + /// Get the atomic variable that keeps track of the standalone data storage + /// size. + std::atomic<uint64_t> &standaloneStorageSize() const; + + /// Increase the standalone data size. + void recordStandaloneSizeIncrease(size_t SizeIncrease); + /// Get the standalone data size. + uint64_t getStandaloneStorageSize() const; + + // Private constructor. + OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, + OnDiskDataAllocator DataPool, + std::unique_ptr<OnDiskGraphDB> UpstreamDB, + FaultInPolicy Policy); + + /// Mapping from hash to object reference. + /// + /// Data type is TrieRecord. + OnDiskTrieRawHashMap Index; + + /// Storage for most objects. + /// + /// Data type is DataRecordHandle. + OnDiskDataAllocator DataPool; + + /// A StandaloneDataMap. + void *StandaloneData = nullptr; + + /// Path to the root directory. + std::string RootPath; + + /// Optional on-disk store to be used for faulting-in nodes. + std::unique_ptr<OnDiskGraphDB> UpstreamDB; + + /// The policy used to fault in data from upstream. + FaultInPolicy FIPolicy; +}; + +} // namespace llvm::cas::ondisk + +#endif // LLVM_CAS_ONDISKGRAPHDB_H diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h new file mode 100644 index 0000000..b762518 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskKeyValueDB, a key value storage database of fixed size +/// key and value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H +#define LLVM_CAS_ONDISKKEYVALUEDB_H + +#include "llvm/CAS/OnDiskTrieRawHashMap.h" + +namespace llvm::cas::ondisk { + +/// An on-disk key-value data store with the following properties: +/// * Keys are fixed length binary hashes with expected normal distribution. +/// * Values are buffers of the same size, specified at creation time. +/// * The value of a key cannot be changed once it is set. +/// * The value buffers returned from a key lookup have 8-byte alignment. +class OnDiskKeyValueDB { +public: + /// Associate a value with a key. + /// + /// \param Key the hash bytes for the key + /// \param Value the value bytes, same size as \p ValueSize parameter of + /// \p open call. + /// + /// \returns the value associated with the \p Key. It may be different than + /// \p Value if another value is already associated with this key. + Expected<ArrayRef<char>> put(ArrayRef<uint8_t> Key, ArrayRef<char> Value); + + /// \returns the value associated with the \p Key, or \p std::nullopt if the + /// key does not exist. + Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key); + + /// \returns Total size of stored data. + size_t getStorageSize() const { return Cache.size(); } + + /// \returns The precentage of space utilization of hard space limits. + /// + /// Return value is an integer between 0 and 100 for percentage. + unsigned getHardStorageLimitUtilization() const { + return Cache.size() * 100ULL / Cache.capacity(); + } + + /// Open the on-disk store from a directory. + /// + /// \param Path directory for the on-disk store. The directory will be created + /// if it doesn't exist. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param KeySize Size for the key hash bytes. + /// \param ValueName Identifier name for the values. + /// \param ValueSize Size for the value bytes. + static Expected<std::unique_ptr<OnDiskKeyValueDB>> + open(StringRef Path, StringRef HashName, unsigned KeySize, + StringRef ValueName, size_t ValueSize); + + using CheckValueT = + function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>; + /// Validate the storage with a callback \p CheckValue to check the stored + /// value. + Error validate(CheckValueT CheckValue) const; + +private: + OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache) + : ValueSize(ValueSize), Cache(std::move(Cache)) {} + + const size_t ValueSize; + OnDiskTrieRawHashMap Cache; +}; + +} // namespace llvm::cas::ondisk + +#endif // LLVM_CAS_ONDISKKEYVALUEDB_H diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 1a01fa6..87b9520 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -541,6 +541,14 @@ struct DeviceT { std::tuple<OPT(DeviceModifier), DeviceDescription> t; }; +// [6.0:362] +template <typename T, typename I, typename E> // +struct DeviceSafesyncT { + using Requires = E; + using WrapperTrait = std::true_type; + OPT(Requires) v; +}; + // V5.2: [13.1] `device_type` clause template <typename T, typename I, typename E> // struct DeviceTypeT { @@ -1332,14 +1340,15 @@ using WrapperClausesT = std::variant< AtomicDefaultMemOrderT<T, I, E>, AtT<T, I, E>, BindT<T, I, E>, CollapseT<T, I, E>, ContainsT<T, I, E>, CopyinT<T, I, E>, CopyprivateT<T, I, E>, DefaultT<T, I, E>, DestroyT<T, I, E>, - DetachT<T, I, E>, DeviceTypeT<T, I, E>, DynamicAllocatorsT<T, I, E>, - EnterT<T, I, E>, ExclusiveT<T, I, E>, FailT<T, I, E>, FilterT<T, I, E>, - FinalT<T, I, E>, FirstprivateT<T, I, E>, HasDeviceAddrT<T, I, E>, - HintT<T, I, E>, HoldsT<T, I, E>, InclusiveT<T, I, E>, IndirectT<T, I, E>, - InitializerT<T, I, E>, IsDevicePtrT<T, I, E>, LinkT<T, I, E>, - MessageT<T, I, E>, NocontextT<T, I, E>, NontemporalT<T, I, E>, - NovariantsT<T, I, E>, NumTeamsT<T, I, E>, NumThreadsT<T, I, E>, - OrderedT<T, I, E>, PartialT<T, I, E>, PriorityT<T, I, E>, PrivateT<T, I, E>, + DetachT<T, I, E>, DeviceSafesyncT<T, I, E>, DeviceTypeT<T, I, E>, + DynamicAllocatorsT<T, I, E>, EnterT<T, I, E>, ExclusiveT<T, I, E>, + FailT<T, I, E>, FilterT<T, I, E>, FinalT<T, I, E>, FirstprivateT<T, I, E>, + HasDeviceAddrT<T, I, E>, HintT<T, I, E>, HoldsT<T, I, E>, + InclusiveT<T, I, E>, IndirectT<T, I, E>, InitializerT<T, I, E>, + IsDevicePtrT<T, I, E>, LinkT<T, I, E>, MessageT<T, I, E>, + NocontextT<T, I, E>, NontemporalT<T, I, E>, NovariantsT<T, I, E>, + NumTeamsT<T, I, E>, NumThreadsT<T, I, E>, OrderedT<T, I, E>, + PartialT<T, I, E>, PriorityT<T, I, E>, PrivateT<T, I, E>, ProcBindT<T, I, E>, ReverseOffloadT<T, I, E>, SafelenT<T, I, E>, SelfMapsT<T, I, E>, SeverityT<T, I, E>, SharedT<T, I, E>, SimdlenT<T, I, E>, SizesT<T, I, E>, PermutationT<T, I, E>, ThreadLimitT<T, I, E>, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index edcf7a9..61a1a05 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -163,6 +163,10 @@ def OMPC_Device : Clause<[Spelling<"device">]> { let clangClass = "OMPDeviceClause"; let flangClass = "OmpDeviceClause"; } +def OMPC_DeviceSafesync : Clause<[Spelling<"device_safesync">]> { + let flangClass = "OmpDeviceSafesyncClause"; + let isValueOptional = true; +} def OMPC_DeviceType : Clause<[Spelling<"device_type">]> { let flangClass = "OmpDeviceTypeClause"; } @@ -1018,6 +1022,7 @@ def OMP_Requires : Directive<[Spelling<"requires">]> { let allowedOnceClauses = [ VersionedClause<OMPC_UnifiedAddress>, VersionedClause<OMPC_UnifiedSharedMemory>, + VersionedClause<OMPC_DeviceSafesync, 60>, // OpenMP 5.2 Spec: If an implementation is not supporting a requirement // (reverse offload in this case) then it should give compile-time error // termination. diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b0269ee..b81edc3 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -522,7 +522,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_neon_vcmla_rot90 : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_neon_vcmla_rot180 : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic; - + // FP8 fscale def int_aarch64_neon_fp8_fscale : DefaultAttrsIntrinsic< [llvm_anyvector_ty], @@ -1467,7 +1467,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide2VectorType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; - + class SVE2_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1482,7 +1482,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; - + class SVE2_1VectorArg_Pred_Intrinsic : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty], @@ -1492,7 +1492,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; - + class SVE2_Pred_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -3353,11 +3353,11 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty], [IntrNoMem]>; - + class SVE2_CVT_WIDENING_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; - + class SVE2_CVT_VG4_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>], @@ -3740,7 +3740,7 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrInaccessibleMemOnly, IntrWriteMem]>; - + class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -3887,7 +3887,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; - + // Lookup table expand two registers // def int_aarch64_sme_luti2_lane_zt_x2 @@ -3896,7 +3896,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; - + // // Lookup table expand four registers // @@ -3914,7 +3914,7 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>; - + // // Register scaling // @@ -3962,7 +3962,7 @@ def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2.1 - Move predicate to/from vector // -def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; +def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic; @@ -4004,10 +4004,10 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrReadMem, IntrInaccessibleMemOnly]>; - + def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; - + def int_aarch64_sve_fp8_cvtnt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], @@ -4019,32 +4019,32 @@ let TargetPrefix = "aarch64" in { [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; - + class SVE2_FP8_FMLA_FDOT_Lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; - + def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; // Fused multiply-add def int_aarch64_sve_fp8_fmlalb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index ded00b1..9e334d4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2819,20 +2819,24 @@ class AMDGPULoadToLDS : "", [SDNPMemOperand]>; def int_amdgcn_load_to_lds : AMDGPULoadToLDS; -class AMDGPUGlobalLoadLDS : - ClangBuiltin<"__builtin_amdgcn_global_load_lds">, - Intrinsic < - [], - [LLVMQualPointerType<1>, // Base global pointer to load from - LLVMQualPointerType<3>, // LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) - llvm_i32_ty, // imm offset (applied to both global and LDS address) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, - // bit 1 = sc1, - // bit 4 = scc)) - [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, - ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], - "", [SDNPMemOperand]>; +class AMDGPUGlobalLoadLDS + : ClangBuiltin<"__builtin_amdgcn_global_load_lds">, + Intrinsic< + [], + [LLVMQualPointerType<1>, // Base global pointer to load from + LLVMQualPointerType<3>, // LDS base pointer to store to + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) + llvm_i32_ty, // imm offset (applied to both global and LDS address) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, + // bit 1 = sc1, + // bit 4 = scc, + // bit 31 = volatile + // (compiler + // implemented))) + [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, + IntrNoCallback, IntrNoFree], + "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; // This is IntrHasSideEffects because it reads from a volatile hardware register. diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 41232335..9bbb8a2 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -316,11 +316,9 @@ inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx, /// Valid only for positive powers of two. template <size_t kValue> constexpr size_t ConstantLog2() { static_assert(llvm::isPowerOf2_64(kValue), "Value is not a valid power of 2"); - return 1 + ConstantLog2<kValue / 2>(); + return llvm::countr_zero_constexpr(kValue); } -template <> constexpr size_t ConstantLog2<1>() { return 0; } - template <size_t kValue> LLVM_DEPRECATED("Use ConstantLog2 instead", "ConstantLog2") constexpr size_t CTLog2() { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 20a8e1c..dc813f6 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5440,9 +5440,10 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, // ptrtoint (ptradd (Ptr, X - ptrtoint(Ptr))) -> X Value *Ptr, *X; - if (CastOpc == Instruction::PtrToInt && - match(Op, m_PtrAdd(m_Value(Ptr), - m_Sub(m_Value(X), m_PtrToInt(m_Deferred(Ptr))))) && + if ((CastOpc == Instruction::PtrToInt || CastOpc == Instruction::PtrToAddr) && + match(Op, + m_PtrAdd(m_Value(Ptr), + m_Sub(m_Value(X), m_PtrToIntOrAddr(m_Deferred(Ptr))))) && X->getType() == Ty && Ty == Q.DL.getIndexType(Ptr->getType())) return X; diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 1d1a5560..9a5ae2a 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -324,32 +324,44 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, FAM.invalidate(*Caller, PA); } Advice.updateCachedCallerFPI(FAM); - int64_t IRSizeAfter = - getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); - CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); + if (Caller == Callee) { + assert(!CalleeWasDeleted); + // We double-counted CallerAndCalleeEdges - since the caller and callee + // would be the same + assert(Advice.CallerAndCalleeEdges % 2 == 0); + CurrentIRSize += getIRSize(*Caller) - Advice.CallerIRSize; + EdgeCount += getCachedFPI(*Caller).DirectCallsToDefinedFunctions - + Advice.CallerAndCalleeEdges / 2; + // The NodeCount would stay the same. + } else { + int64_t IRSizeAfter = + getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); + CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); + + // We can delta-update module-wide features. We know the inlining only + // changed the caller, and maybe the callee (by deleting the latter). Nodes + // are simple to update. For edges, we 'forget' the edges that the caller + // and callee used to have before inlining, and add back what they currently + // have together. + int64_t NewCallerAndCalleeEdges = + getCachedFPI(*Caller).DirectCallsToDefinedFunctions; + + // A dead function's node is not actually removed from the call graph until + // the end of the call graph walk, but the node no longer belongs to any + // valid SCC. + if (CalleeWasDeleted) { + --NodeCount; + NodesInLastSCC.erase(CG.lookup(*Callee)); + DeadFunctions.insert(Callee); + } else { + NewCallerAndCalleeEdges += + getCachedFPI(*Callee).DirectCallsToDefinedFunctions; + } + EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); + } if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) ForceStop = true; - // We can delta-update module-wide features. We know the inlining only changed - // the caller, and maybe the callee (by deleting the latter). - // Nodes are simple to update. - // For edges, we 'forget' the edges that the caller and callee used to have - // before inlining, and add back what they currently have together. - int64_t NewCallerAndCalleeEdges = - getCachedFPI(*Caller).DirectCallsToDefinedFunctions; - - // A dead function's node is not actually removed from the call graph until - // the end of the call graph walk, but the node no longer belongs to any valid - // SCC. - if (CalleeWasDeleted) { - --NodeCount; - NodesInLastSCC.erase(CG.lookup(*Callee)); - DeadFunctions.insert(Callee); - } else { - NewCallerAndCalleeEdges += - getCachedFPI(*Callee).DirectCallsToDefinedFunctions; - } - EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c4eb838..6f7dd79 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15474,11 +15474,26 @@ void ScalarEvolution::LoopGuards::collectFromPHI( } // Return a new SCEV that modifies \p Expr to the closest number divides by +// \p Divisor and less or equal than Expr. For now, only handle constant +// Expr. +static const SCEV *getPreviousSCEVDivisibleByDivisor(const SCEV *Expr, + const APInt &DivisorVal, + ScalarEvolution &SE) { + const APInt *ExprVal; + if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() || + DivisorVal.isNonPositive()) + return Expr; + APInt Rem = ExprVal->urem(DivisorVal); + // return the SCEV: Expr - Expr % Divisor + return SE.getConstant(*ExprVal - Rem); +} + +// Return a new SCEV that modifies \p Expr to the closest number divides by // \p Divisor and greater or equal than Expr. For now, only handle constant // Expr. -static const SCEV *getNextSCEVDividesByDivisor(const SCEV *Expr, - const APInt &DivisorVal, - ScalarEvolution &SE) { +static const SCEV *getNextSCEVDivisibleByDivisor(const SCEV *Expr, + const APInt &DivisorVal, + ScalarEvolution &SE) { const APInt *ExprVal; if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() || DivisorVal.isNonPositive()) @@ -15557,20 +15572,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock( match(LHS, m_scev_APInt(C)) && C->isNonNegative(); }; - // Return a new SCEV that modifies \p Expr to the closest number divides by - // \p Divisor and less or equal than Expr. For now, only handle constant - // Expr. - auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr, - const APInt &DivisorVal) { - const APInt *ExprVal; - if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() || - DivisorVal.isNonPositive()) - return Expr; - APInt Rem = ExprVal->urem(DivisorVal); - // return the SCEV: Expr - Expr % Divisor - return SE.getConstant(*ExprVal - Rem); - }; - // Apply divisibilty by \p Divisor on MinMaxExpr with constant values, // recursively. This is done by aligning up/down the constant value to the // Divisor. @@ -15592,8 +15593,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock( assert(SE.isKnownNonNegative(MinMaxLHS) && "Expected non-negative operand!"); auto *DivisibleExpr = - IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, DivisorVal) - : getNextSCEVDividesByDivisor(MinMaxLHS, DivisorVal, SE); + IsMin + ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE) + : getNextSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE); SmallVector<const SCEV *> Ops = { ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr}; return SE.getMinMaxExpr(SCTy, Ops); @@ -15670,21 +15672,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock( [[fallthrough]]; case CmpInst::ICMP_SLT: { RHS = SE.getMinusSCEV(RHS, One); - RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy); + RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE); break; } case CmpInst::ICMP_UGT: case CmpInst::ICMP_SGT: RHS = SE.getAddExpr(RHS, One); - RHS = getNextSCEVDividesByDivisor(RHS, DividesBy, SE); + RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE); break; case CmpInst::ICMP_ULE: case CmpInst::ICMP_SLE: - RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy); + RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE); break; case CmpInst::ICMP_UGE: case CmpInst::ICMP_SGE: - RHS = getNextSCEVDividesByDivisor(RHS, DividesBy, SE); + RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE); break; default: break; @@ -15738,7 +15740,7 @@ void ScalarEvolution::LoopGuards::collectFromBlock( case CmpInst::ICMP_NE: if (match(RHS, m_scev_Zero())) { const SCEV *OneAlignedUp = - getNextSCEVDividesByDivisor(One, DividesBy, SE); + getNextSCEVDivisibleByDivisor(One, DividesBy, SE); To = SE.getUMaxExpr(FromRewritten, OneAlignedUp); } else { // LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS), @@ -15965,7 +15967,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { if (LHS > RHS) std::swap(LHS, RHS); if (NotEqual.contains({LHS, RHS})) { - const SCEV *OneAlignedUp = getNextSCEVDividesByDivisor( + const SCEV *OneAlignedUp = getNextSCEVDivisibleByDivisor( SE.getOne(S->getType()), SE.getConstantMultiple(S), SE); return SE.getUMaxExpr(OneAlignedUp, S); } diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index bca39b6..a2f8c49 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -8,6 +8,8 @@ add_llvm_component_library(LLVMCAS ObjectStore.cpp OnDiskCommon.cpp OnDiskDataAllocator.cpp + OnDiskGraphDB.cpp + OnDiskKeyValueDB.cpp OnDiskTrieRawHashMap.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/CAS/OnDiskCommon.cpp b/llvm/lib/CAS/OnDiskCommon.cpp index 25aa06b..281bde9 100644 --- a/llvm/lib/CAS/OnDiskCommon.cpp +++ b/llvm/lib/CAS/OnDiskCommon.cpp @@ -7,9 +7,10 @@ //===----------------------------------------------------------------------===// #include "OnDiskCommon.h" -#include "llvm/Config/config.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Process.h" +#include <mutex> #include <thread> #if __has_include(<sys/file.h>) @@ -25,8 +26,44 @@ #include <fcntl.h> #endif +#if __has_include(<sys/mount.h>) +#include <sys/mount.h> // statfs +#endif + using namespace llvm; +static uint64_t OnDiskCASMaxMappingSize = 0; + +Expected<std::optional<uint64_t>> cas::ondisk::getOverriddenMaxMappingSize() { + static std::once_flag Flag; + Error Err = Error::success(); + std::call_once(Flag, [&Err] { + ErrorAsOutParameter EAO(&Err); + constexpr const char *EnvVar = "LLVM_CAS_MAX_MAPPING_SIZE"; + auto Value = sys::Process::GetEnv(EnvVar); + if (!Value) + return; + + uint64_t Size; + if (StringRef(*Value).getAsInteger(/*auto*/ 0, Size)) + Err = createStringError(inconvertibleErrorCode(), + "invalid value for %s: expected integer", EnvVar); + OnDiskCASMaxMappingSize = Size; + }); + + if (Err) + return std::move(Err); + + if (OnDiskCASMaxMappingSize == 0) + return std::nullopt; + + return OnDiskCASMaxMappingSize; +} + +void cas::ondisk::setMaxMappingSize(uint64_t Size) { + OnDiskCASMaxMappingSize = Size; +} + std::error_code cas::ondisk::lockFileThreadSafe(int FD, sys::fs::LockKind Kind) { #if HAVE_FLOCK @@ -125,3 +162,20 @@ Expected<size_t> cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, return NewSize; // Pretend it worked. #endif } + +bool cas::ondisk::useSmallMappingSize(const Twine &P) { + // Add exceptions to use small database file here. +#if defined(__APPLE__) && __has_include(<sys/mount.h>) + // macOS tmpfs does not support sparse tails. + SmallString<128> PathStorage; + StringRef Path = P.toNullTerminatedStringRef(PathStorage); + struct statfs StatFS; + if (statfs(Path.data(), &StatFS) != 0) + return false; + + if (strcmp(StatFS.f_fstypename, "tmpfs") == 0) + return true; +#endif + // Default to use regular database file. + return false; +} diff --git a/llvm/lib/CAS/OnDiskCommon.h b/llvm/lib/CAS/OnDiskCommon.h index 8b79ffe..ac00662 100644 --- a/llvm/lib/CAS/OnDiskCommon.h +++ b/llvm/lib/CAS/OnDiskCommon.h @@ -12,9 +12,31 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include <chrono> +#include <optional> namespace llvm::cas::ondisk { +/// The version for all the ondisk database files. It needs to be bumped when +/// compatibility breaking changes are introduced. +constexpr StringLiteral CASFormatVersion = "v1"; + +/// Retrieves an overridden maximum mapping size for CAS files, if any, +/// speicified by LLVM_CAS_MAX_MAPPING_SIZE in the environment or set by +/// `setMaxMappingSize()`. If the value from environment is unreadable, returns +/// an error. +Expected<std::optional<uint64_t>> getOverriddenMaxMappingSize(); + +/// Set MaxMappingSize for ondisk CAS. This function is not thread-safe and +/// should be set before creaing any ondisk CAS and does not affect CAS already +/// created. Set value 0 to use default size. +void setMaxMappingSize(uint64_t Size); + +/// Whether to use a small file mapping for ondisk databases created in \p Path. +/// +/// For some file system that doesn't support sparse file, use a smaller file +/// mapping to avoid consuming too much disk space on creation. +bool useSmallMappingSize(const Twine &Path); + /// Thread-safe alternative to \c sys::fs::lockFile. This does not support all /// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library /// for now. diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp index 13bbd66..9c68bc4 100644 --- a/llvm/lib/CAS/OnDiskDataAllocator.cpp +++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp @@ -185,7 +185,7 @@ Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset, return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size}; } -MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { +MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() const { return Impl->Store.getUserHeader(); } @@ -221,7 +221,9 @@ Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset, "OnDiskDataAllocator is not supported"); } -MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; } +MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() const { + return {}; +} size_t OnDiskDataAllocator::size() const { return 0; } size_t OnDiskDataAllocator::capacity() const { return 0; } diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp new file mode 100644 index 0000000..72bb98c --- /dev/null +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -0,0 +1,1755 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements OnDiskGraphDB, an on-disk CAS nodes database, +/// independent of a particular hashing algorithm. It only needs to be +/// configured for the hash size and controls the schema of the storage. +/// +/// OnDiskGraphDB defines: +/// +/// - How the data is stored inside database, either as a standalone file, or +/// allocated inside a datapool. +/// - How references to other objects inside the same database is stored. They +/// are stored as internal references, instead of full hash value to save +/// space. +/// - How to chain databases together and import objects from upstream +/// databases. +/// +/// Here's a top-level description of the current layout: +/// +/// - db/index.<version>: a file for the "index" table, named by \a +/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B +/// that are accessed atomically, describing the object kind and where/how +/// it's stored (including an optional file offset). See \a TrieRecord for +/// more details. +/// - db/data.<version>: a file for the "data" table, named by \a +/// DataPoolTableName and managed by \a DataStore. New objects within +/// TrieRecord::MaxEmbeddedSize are inserted here as \a +/// TrieRecord::StorageKind::DataPool. +/// - db/obj.<offset>.<version>: a file storing an object outside the main +/// "data" table, named by its offset into the "index" table, with the +/// format of \a TrieRecord::StorageKind::Standalone. +/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the +/// main "data" table, named by its offset into the "index" table, with +/// the format of \a TrieRecord::StorageKind::StandaloneLeaf. +/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object +/// outside the main "data" table, named by its offset into the "index" table, +/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskGraphDB.h" +#include "OnDiskCommon.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include <atomic> +#include <mutex> +#include <optional> + +#define DEBUG_TYPE "on-disk-cas" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +static constexpr StringLiteral IndexTableName = "llvm.cas.index"; +static constexpr StringLiteral DataPoolTableName = "llvm.cas.data"; + +static constexpr StringLiteral IndexFilePrefix = "index."; +static constexpr StringLiteral DataPoolFilePrefix = "data."; + +static constexpr StringLiteral FilePrefixObject = "obj."; +static constexpr StringLiteral FilePrefixLeaf = "leaf."; +static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0."; + +static Error createCorruptObjectError(Expected<ArrayRef<uint8_t>> ID) { + if (!ID) + return ID.takeError(); + + return createStringError(llvm::errc::invalid_argument, + "corrupt object '" + toHex(*ID) + "'"); +} + +namespace { + +/// Trie record data: 8 bytes, atomic<uint64_t> +/// - 1-byte: StorageKind +/// - 7-bytes: DataStoreOffset (offset into referenced file) +class TrieRecord { +public: + enum class StorageKind : uint8_t { + /// Unknown object. + Unknown = 0, + + /// data.vX: main pool, full DataStore record. + DataPool = 1, + + /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record. + Standalone = 10, + + /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents + /// exactly the data content and file size matches the data size. No refs. + StandaloneLeaf = 11, + + /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an + /// extra null character ('\0'). File size is 1 bigger than the data size. + /// No refs. + StandaloneLeaf0 = 12, + }; + + static StringRef getStandaloneFilePrefix(StorageKind SK) { + switch (SK) { + default: + llvm_unreachable("Expected standalone storage kind"); + case TrieRecord::StorageKind::Standalone: + return FilePrefixObject; + case TrieRecord::StorageKind::StandaloneLeaf: + return FilePrefixLeaf; + case TrieRecord::StorageKind::StandaloneLeaf0: + return FilePrefixLeaf0; + } + } + + enum Limits : int64_t { + /// Saves files bigger than 64KB standalone instead of embedding them. + MaxEmbeddedSize = 64LL * 1024LL - 1, + }; + + struct Data { + StorageKind SK = StorageKind::Unknown; + FileOffset Offset; + }; + + /// Pack StorageKind and Offset from Data into 8 byte TrieRecord. + static uint64_t pack(Data D) { + assert(D.Offset.get() < (int64_t)(1ULL << 56)); + uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get(); + assert(D.SK != StorageKind::Unknown || Packed == 0); +#ifndef NDEBUG + Data RoundTrip = unpack(Packed); + assert(D.SK == RoundTrip.SK); + assert(D.Offset.get() == RoundTrip.Offset.get()); +#endif + return Packed; + } + + // Unpack TrieRecord into Data. + static Data unpack(uint64_t Packed) { + Data D; + if (!Packed) + return D; + D.SK = (StorageKind)(Packed >> 56); + D.Offset = FileOffset(Packed & (UINT64_MAX >> 8)); + return D; + } + + TrieRecord() : Storage(0) {} + + Data load() const { return unpack(Storage); } + bool compare_exchange_strong(Data &Existing, Data New); + +private: + std::atomic<uint64_t> Storage; +}; + +/// DataStore record data: 4B + size? + refs? + data + 0 +/// - 4-bytes: Header +/// - {0,4,8}-bytes: DataSize (may be packed in Header) +/// - {0,4,8}-bytes: NumRefs (may be packed in Header) +/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned) +/// - <data> +/// - 1-byte: 0-term +struct DataRecordHandle { + /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment + /// convenience to avoid computing padding later. + enum class NumRefsFlags : uint8_t { + Uses0B = 0U, + Uses1B = 1U, + Uses2B = 2U, + Uses4B = 3U, + Uses8B = 4U, + Max = Uses8B, + }; + + /// DataSize storage: 8B, 4B, 2B, or 1B. + enum class DataSizeFlags { + Uses1B = 0U, + Uses2B = 1U, + Uses4B = 2U, + Uses8B = 3U, + Max = Uses8B, + }; + + /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B. + enum class RefKindFlags { + InternalRef = 0U, + InternalRef4B = 1U, + Max = InternalRef4B, + }; + + enum Counts : int { + NumRefsShift = 0, + NumRefsBits = 3, + DataSizeShift = NumRefsShift + NumRefsBits, + DataSizeBits = 2, + RefKindShift = DataSizeShift + DataSizeBits, + RefKindBits = 1, + }; + static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) == + 0, + "Not enough bits"); + static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) == + 0, + "Not enough bits"); + static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) == + 0, + "Not enough bits"); + + /// Layout of the DataRecordHandle and how to decode it. + struct LayoutFlags { + NumRefsFlags NumRefs; + DataSizeFlags DataSize; + RefKindFlags RefKind; + + static uint64_t pack(LayoutFlags LF) { + unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) | + ((unsigned)LF.DataSize << DataSizeShift) | + ((unsigned)LF.RefKind << RefKindShift); +#ifndef NDEBUG + LayoutFlags RoundTrip = unpack(Packed); + assert(LF.NumRefs == RoundTrip.NumRefs); + assert(LF.DataSize == RoundTrip.DataSize); + assert(LF.RefKind == RoundTrip.RefKind); +#endif + return Packed; + } + static LayoutFlags unpack(uint64_t Storage) { + assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte"); + LayoutFlags LF; + LF.NumRefs = + (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1)); + LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) & + ((1U << DataSizeBits) - 1)); + LF.RefKind = + (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1)); + return LF; + } + }; + + /// Header layout: + /// - 1-byte: LayoutFlags + /// - 1-byte: 1B size field + /// - {0,2}-bytes: 2B size field + struct Header { + using PackTy = uint32_t; + PackTy Packed; + + static constexpr unsigned LayoutFlagsShift = + (sizeof(PackTy) - 1) * CHAR_BIT; + }; + + struct Input { + InternalRefArrayRef Refs; + ArrayRef<char> Data; + }; + + LayoutFlags getLayoutFlags() const { + return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift); + } + + uint64_t getDataSize() const; + void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const; + uint32_t getNumRefs() const; + void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const; + int64_t getRefsRelOffset() const; + int64_t getDataRelOffset() const; + + static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) { + return DataRelOffset + DataSize + 1; + } + uint64_t getTotalSize() const { + return getDataRelOffset() + getDataSize() + 1; + } + + /// Describe the layout of data stored and how to decode from + /// DataRecordHandle. + struct Layout { + explicit Layout(const Input &I); + + LayoutFlags Flags; + uint64_t DataSize = 0; + uint32_t NumRefs = 0; + int64_t RefsRelOffset = 0; + int64_t DataRelOffset = 0; + uint64_t getTotalSize() const { + return DataRecordHandle::getTotalSize(DataRelOffset, DataSize); + } + }; + + InternalRefArrayRef getRefs() const { + assert(H && "Expected valid handle"); + auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset(); + size_t Size = getNumRefs(); + if (!Size) + return InternalRefArrayRef(); + if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B) + return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size); + return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size); + } + + ArrayRef<char> getData() const { + assert(H && "Expected valid handle"); + return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(), + getDataSize()); + } + + static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc, + const Input &I); + static Expected<DataRecordHandle> + createWithError(function_ref<Expected<char *>(size_t Size)> Alloc, + const Input &I); + static DataRecordHandle construct(char *Mem, const Input &I); + + static DataRecordHandle get(const char *Mem) { + return DataRecordHandle( + *reinterpret_cast<const DataRecordHandle::Header *>(Mem)); + } + static Expected<DataRecordHandle> + getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset); + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + + DataRecordHandle() = default; + explicit DataRecordHandle(const Header &H) : H(&H) {} + +private: + static DataRecordHandle constructImpl(char *Mem, const Input &I, + const Layout &L); + const Header *H = nullptr; +}; + +/// Proxy for any on-disk object or raw data. +struct OnDiskContent { + std::optional<DataRecordHandle> Record; + std::optional<ArrayRef<char>> Bytes; +}; + +/// Data loaded inside the memory from standalone file. +class StandaloneDataInMemory { +public: + OnDiskContent getContent() const; + + StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region, + TrieRecord::StorageKind SK) + : Region(std::move(Region)), SK(SK) { +#ifndef NDEBUG + bool IsStandalone = false; + switch (SK) { + case TrieRecord::StorageKind::Standalone: + case TrieRecord::StorageKind::StandaloneLeaf: + case TrieRecord::StorageKind::StandaloneLeaf0: + IsStandalone = true; + break; + default: + break; + } + assert(IsStandalone); +#endif + } + +private: + std::unique_ptr<sys::fs::mapped_file_region> Region; + TrieRecord::StorageKind SK; +}; + +/// Container to lookup loaded standalone objects. +template <size_t NumShards> class StandaloneDataMap { + static_assert(isPowerOf2_64(NumShards), "Expected power of 2"); + +public: + uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK, + std::unique_ptr<sys::fs::mapped_file_region> Region); + + const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const; + bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); } + +private: + struct Shard { + /// Needs to store a std::unique_ptr for a stable address identity. + DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map; + mutable std::mutex Mutex; + }; + Shard &getShard(ArrayRef<uint8_t> Hash) { + return const_cast<Shard &>( + const_cast<const StandaloneDataMap *>(this)->getShard(Hash)); + } + const Shard &getShard(ArrayRef<uint8_t> Hash) const { + static_assert(NumShards <= 256, "Expected only 8 bits of shard"); + return Shards[Hash[0] % NumShards]; + } + + Shard Shards[NumShards]; +}; + +using StandaloneDataMapTy = StandaloneDataMap<16>; + +/// A vector of internal node references. +class InternalRefVector { +public: + void push_back(InternalRef Ref) { + if (NeedsFull) + return FullRefs.push_back(Ref); + if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref)) + return SmallRefs.push_back(*Small); + NeedsFull = true; + assert(FullRefs.empty()); + FullRefs.reserve(SmallRefs.size() + 1); + for (InternalRef4B Small : SmallRefs) + FullRefs.push_back(Small); + FullRefs.push_back(Ref); + SmallRefs.clear(); + } + + operator InternalRefArrayRef() const { + assert(SmallRefs.empty() || FullRefs.empty()); + return NeedsFull ? InternalRefArrayRef(FullRefs) + : InternalRefArrayRef(SmallRefs); + } + +private: + bool NeedsFull = false; + SmallVector<InternalRef4B> SmallRefs; + SmallVector<InternalRef> FullRefs; +}; + +} // namespace + +Expected<DataRecordHandle> DataRecordHandle::createWithError( + function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) { + Layout L(I); + if (Expected<char *> Mem = Alloc(L.getTotalSize())) + return constructImpl(*Mem, I, L); + else + return Mem.takeError(); +} + +DataRecordHandle +DataRecordHandle::create(function_ref<char *(size_t Size)> Alloc, + const Input &I) { + Layout L(I); + return constructImpl(Alloc(L.getTotalSize()), I, L); +} + +ObjectHandle ObjectHandle::fromFileOffset(FileOffset Offset) { + // Store the file offset as it is. + assert(!(Offset.get() & 0x1)); + return ObjectHandle(Offset.get()); +} + +ObjectHandle ObjectHandle::fromMemory(uintptr_t Ptr) { + // Store the pointer from memory with lowest bit set. + assert(!(Ptr & 0x1)); + return ObjectHandle(Ptr | 1); +} + +/// Proxy for an on-disk index record. +struct OnDiskGraphDB::IndexProxy { + FileOffset Offset; + ArrayRef<uint8_t> Hash; + TrieRecord &Ref; +}; + +template <size_t N> +uintptr_t StandaloneDataMap<N>::insert( + ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK, + std::unique_ptr<sys::fs::mapped_file_region> Region) { + auto &S = getShard(Hash); + std::lock_guard<std::mutex> Lock(S.Mutex); + auto &V = S.Map[Hash.data()]; + if (!V) + V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK); + return reinterpret_cast<uintptr_t>(V.get()); +} + +template <size_t N> +const StandaloneDataInMemory * +StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const { + auto &S = getShard(Hash); + std::lock_guard<std::mutex> Lock(S.Mutex); + auto I = S.Map.find(Hash.data()); + if (I == S.Map.end()) + return nullptr; + return &*I->second; +} + +namespace { + +/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too +/// expensive to register/unregister at this rate. +/// +/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp +/// files and has a signal handler registerd that removes them all. +class TempFile { + bool Done = false; + TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {} + +public: + /// This creates a temporary file with createUniqueFile. + static Expected<TempFile> create(const Twine &Model); + TempFile(TempFile &&Other) { *this = std::move(Other); } + TempFile &operator=(TempFile &&Other) { + TmpName = std::move(Other.TmpName); + FD = Other.FD; + Other.Done = true; + Other.FD = -1; + return *this; + } + + // Name of the temporary file. + std::string TmpName; + + // The open file descriptor. + int FD = -1; + + // Keep this with the given name. + Error keep(const Twine &Name); + Error discard(); + + // This checks that keep or delete was called. + ~TempFile() { consumeError(discard()); } +}; + +class MappedTempFile { +public: + char *data() const { return Map.data(); } + size_t size() const { return Map.size(); } + + Error discard() { + assert(Map && "Map already destroyed"); + Map.unmap(); + return Temp.discard(); + } + + Error keep(const Twine &Name) { + assert(Map && "Map already destroyed"); + Map.unmap(); + return Temp.keep(Name); + } + + MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map) + : Temp(std::move(Temp)), Map(std::move(Map)) {} + +private: + TempFile Temp; + sys::fs::mapped_file_region Map; +}; +} // namespace + +Error TempFile::discard() { + Done = true; + if (FD != -1) { + sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); + if (std::error_code EC = sys::fs::closeFile(File)) + return errorCodeToError(EC); + } + FD = -1; + + // Always try to close and remove. + std::error_code RemoveEC; + if (!TmpName.empty()) { + std::error_code EC = sys::fs::remove(TmpName); + if (EC) + return errorCodeToError(EC); + } + TmpName = ""; + + return Error::success(); +} + +Error TempFile::keep(const Twine &Name) { + assert(!Done); + Done = true; + // Always try to close and rename. + std::error_code RenameEC = sys::fs::rename(TmpName, Name); + + if (!RenameEC) + TmpName = ""; + + sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); + if (std::error_code EC = sys::fs::closeFile(File)) + return errorCodeToError(EC); + FD = -1; + + return errorCodeToError(RenameEC); +} + +Expected<TempFile> TempFile::create(const Twine &Model) { + int FD; + SmallString<128> ResultPath; + if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath)) + return errorCodeToError(EC); + + TempFile Ret(ResultPath, FD); + return std::move(Ret); +} + +bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) { + uint64_t ExistingPacked = pack(Existing); + uint64_t NewPacked = pack(New); + if (Storage.compare_exchange_strong(ExistingPacked, NewPacked)) + return true; + Existing = unpack(ExistingPacked); + return false; +} + +DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) { + return constructImpl(Mem, I, Layout(I)); +} + +Expected<DataRecordHandle> +DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool, + FileOffset Offset) { + auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header)); + if (!HeaderData) + return HeaderData.takeError(); + + auto Record = DataRecordHandle::get(HeaderData->data()); + if (Record.getTotalSize() + Offset.get() > Pool.size()) + return createStringError( + make_error_code(std::errc::illegal_byte_sequence), + "data record span passed the end of the data pool"); + + return Record; +} + +DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I, + const Layout &L) { + char *Next = Mem + sizeof(Header); + + // Fill in Packed and set other data, then come back to construct the header. + Header::PackTy Packed = 0; + Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift; + + // Construct DataSize. + switch (L.Flags.DataSize) { + case DataSizeFlags::Uses1B: + assert(I.Data.size() <= UINT8_MAX); + Packed |= (Header::PackTy)I.Data.size() + << ((sizeof(Packed) - 2) * CHAR_BIT); + break; + case DataSizeFlags::Uses2B: + assert(I.Data.size() <= UINT16_MAX); + Packed |= (Header::PackTy)I.Data.size() + << ((sizeof(Packed) - 4) * CHAR_BIT); + break; + case DataSizeFlags::Uses4B: + support::endian::write32le(Next, I.Data.size()); + Next += 4; + break; + case DataSizeFlags::Uses8B: + support::endian::write64le(Next, I.Data.size()); + Next += 8; + break; + } + + // Construct NumRefs. + // + // NOTE: May be writing NumRefs even if there are zero refs in order to fix + // alignment. + switch (L.Flags.NumRefs) { + case NumRefsFlags::Uses0B: + break; + case NumRefsFlags::Uses1B: + assert(I.Refs.size() <= UINT8_MAX); + Packed |= (Header::PackTy)I.Refs.size() + << ((sizeof(Packed) - 2) * CHAR_BIT); + break; + case NumRefsFlags::Uses2B: + assert(I.Refs.size() <= UINT16_MAX); + Packed |= (Header::PackTy)I.Refs.size() + << ((sizeof(Packed) - 4) * CHAR_BIT); + break; + case NumRefsFlags::Uses4B: + support::endian::write32le(Next, I.Refs.size()); + Next += 4; + break; + case NumRefsFlags::Uses8B: + support::endian::write64le(Next, I.Refs.size()); + Next += 8; + break; + } + + // Construct Refs[]. + if (!I.Refs.empty()) { + assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B()); + ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer(); + llvm::copy(RefsBuffer, Next); + Next += RefsBuffer.size(); + } + + // Construct Data and the trailing null. + assert(isAddrAligned(Align(8), Next)); + llvm::copy(I.Data, Next); + Next[I.Data.size()] = 0; + + // Construct the header itself and return. + Header *H = new (Mem) Header{Packed}; + DataRecordHandle Record(*H); + assert(Record.getData() == I.Data); + assert(Record.getNumRefs() == I.Refs.size()); + assert(Record.getRefs() == I.Refs); + assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize); + assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs); + assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind); + return Record; +} + +DataRecordHandle::Layout::Layout(const Input &I) { + // Start initial relative offsets right after the Header. + uint64_t RelOffset = sizeof(Header); + + // Initialize the easy stuff. + DataSize = I.Data.size(); + NumRefs = I.Refs.size(); + + // Check refs size. + Flags.RefKind = + I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef; + + // Find the smallest slot available for DataSize. + bool Has1B = true; + bool Has2B = true; + if (DataSize <= UINT8_MAX && Has1B) { + Flags.DataSize = DataSizeFlags::Uses1B; + Has1B = false; + } else if (DataSize <= UINT16_MAX && Has2B) { + Flags.DataSize = DataSizeFlags::Uses2B; + Has2B = false; + } else if (DataSize <= UINT32_MAX) { + Flags.DataSize = DataSizeFlags::Uses4B; + RelOffset += 4; + } else { + Flags.DataSize = DataSizeFlags::Uses8B; + RelOffset += 8; + } + + // Find the smallest slot available for NumRefs. Never sets NumRefs8B here. + if (!NumRefs) { + Flags.NumRefs = NumRefsFlags::Uses0B; + } else if (NumRefs <= UINT8_MAX && Has1B) { + Flags.NumRefs = NumRefsFlags::Uses1B; + Has1B = false; + } else if (NumRefs <= UINT16_MAX && Has2B) { + Flags.NumRefs = NumRefsFlags::Uses2B; + Has2B = false; + } else { + Flags.NumRefs = NumRefsFlags::Uses4B; + RelOffset += 4; + } + + // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated + // padding rules when reading and writing. This also bumps RelOffset. + // + // The value for NumRefs is strictly limited to UINT32_MAX, but it can be + // stored as 8B. This means we can *always* find a size to grow. + // + // NOTE: Only call this once. + auto GrowSizeFieldsBy4B = [&]() { + assert(isAligned(Align(4), RelOffset)); + RelOffset += 4; + + assert(Flags.NumRefs != NumRefsFlags::Uses8B && + "Expected to be able to grow NumRefs8B"); + + // First try to grow DataSize. NumRefs will not (yet) be 8B, and if + // DataSize is upgraded to 8B it'll already be aligned. + // + // Failing that, grow NumRefs. + if (Flags.DataSize < DataSizeFlags::Uses4B) + Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B. + else if (Flags.DataSize < DataSizeFlags::Uses8B) + Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B. + else if (Flags.NumRefs < NumRefsFlags::Uses4B) + Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B. + else + Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B. + }; + + assert(isAligned(Align(4), RelOffset)); + if (Flags.RefKind == RefKindFlags::InternalRef) { + // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this + // without padding. + if (!isAligned(Align(8), RelOffset)) + GrowSizeFieldsBy4B(); + + assert(isAligned(Align(8), RelOffset)); + RefsRelOffset = RelOffset; + RelOffset += 8 * NumRefs; + } else { + // The array of 4B refs doesn't need 8B alignment, but the data will need + // to be 8B-aligned. Detect this now, and, if necessary, shift everything + // by 4B by growing one of the sizes. + // If we remove the need for 8B-alignment for data there is <1% savings in + // disk storage for a clang build using MCCAS but the 8B-alignment may be + // useful in the future so keep it for now. + uint64_t RefListSize = 4 * NumRefs; + if (!isAligned(Align(8), RelOffset + RefListSize)) + GrowSizeFieldsBy4B(); + RefsRelOffset = RelOffset; + RelOffset += RefListSize; + } + + assert(isAligned(Align(8), RelOffset)); + DataRelOffset = RelOffset; +} + +uint64_t DataRecordHandle::getDataSize() const { + int64_t RelOffset = sizeof(Header); + auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset; + switch (getLayoutFlags().DataSize) { + case DataSizeFlags::Uses1B: + return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX; + case DataSizeFlags::Uses2B: + return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) & + UINT16_MAX; + case DataSizeFlags::Uses4B: + return support::endian::read32le(DataSizePtr); + case DataSizeFlags::Uses8B: + return support::endian::read64le(DataSizePtr); + } +} + +void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const { + if (LF.DataSize >= DataSizeFlags::Uses4B) + RelOffset += 4; + if (LF.DataSize >= DataSizeFlags::Uses8B) + RelOffset += 4; +} + +uint32_t DataRecordHandle::getNumRefs() const { + LayoutFlags LF = getLayoutFlags(); + int64_t RelOffset = sizeof(Header); + skipDataSize(LF, RelOffset); + auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset; + switch (LF.NumRefs) { + case NumRefsFlags::Uses0B: + return 0; + case NumRefsFlags::Uses1B: + return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX; + case NumRefsFlags::Uses2B: + return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) & + UINT16_MAX; + case NumRefsFlags::Uses4B: + return support::endian::read32le(NumRefsPtr); + case NumRefsFlags::Uses8B: + return support::endian::read64le(NumRefsPtr); + } +} + +void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const { + if (LF.NumRefs >= NumRefsFlags::Uses4B) + RelOffset += 4; + if (LF.NumRefs >= NumRefsFlags::Uses8B) + RelOffset += 4; +} + +int64_t DataRecordHandle::getRefsRelOffset() const { + LayoutFlags LF = getLayoutFlags(); + int64_t RelOffset = sizeof(Header); + skipDataSize(LF, RelOffset); + skipNumRefs(LF, RelOffset); + return RelOffset; +} + +int64_t DataRecordHandle::getDataRelOffset() const { + LayoutFlags LF = getLayoutFlags(); + int64_t RelOffset = sizeof(Header); + skipDataSize(LF, RelOffset); + skipNumRefs(LF, RelOffset); + uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8; + RelOffset += RefSize * getNumRefs(); + return RelOffset; +} + +Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { + return Index.validate([&](FileOffset Offset, + OnDiskTrieRawHashMap::ConstValueProxy Record) + -> Error { + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad record at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + + if (Record.Data.size() != sizeof(TrieRecord)) + return formatError("wrong data record size"); + if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size())) + return formatError("wrong data record alignment"); + + auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data()); + TrieRecord::Data D = R->load(); + std::unique_ptr<MemoryBuffer> FileBuffer; + if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0) + return formatError("invalid record kind value"); + + auto Ref = InternalRef::getFromOffset(Offset); + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + + switch (D.SK) { + case TrieRecord::StorageKind::Unknown: + // This could be an abandoned entry due to a termination before updating + // the record. It can be reused by later insertion so just skip this entry + // for now. + return Error::success(); + case TrieRecord::StorageKind::DataPool: + // Check offset is a postive value, and large enough to hold the + // header for the data record. + if (D.Offset.get() <= 0 || + (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >= + DataPool.size()) + return formatError("datapool record out of bound"); + break; + case TrieRecord::StorageKind::Standalone: + case TrieRecord::StorageKind::StandaloneLeaf: + case TrieRecord::StorageKind::StandaloneLeaf0: + SmallString<256> Path; + getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path); + // If need to validate the content of the file later, just load the + // buffer here. Otherwise, just check the existance of the file. + if (Deep) { + auto File = MemoryBuffer::getFile(Path, /*IsText=*/false, + /*RequiresNullTerminator=*/false); + if (!File || !*File) + return formatError("record file \'" + Path + "\' does not exist"); + + FileBuffer = std::move(*File); + } else if (!llvm::sys::fs::exists(Path)) + return formatError("record file \'" + Path + "\' does not exist"); + } + + if (!Deep) + return Error::success(); + + auto dataError = [&](Twine Msg) { + return createStringError(llvm::errc::illegal_byte_sequence, + "bad data for digest \'" + toHex(I->Hash) + + "\': " + Msg.str()); + }; + SmallVector<ArrayRef<uint8_t>> Refs; + ArrayRef<char> StoredData; + + switch (D.SK) { + case TrieRecord::StorageKind::Unknown: + llvm_unreachable("already handled"); + case TrieRecord::StorageKind::DataPool: { + auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset); + if (!DataRecord) + return dataError(toString(DataRecord.takeError())); + + for (auto InternRef : DataRecord->getRefs()) { + auto Index = getIndexProxyFromRef(InternRef); + if (!Index) + return Index.takeError(); + Refs.push_back(Index->Hash); + } + StoredData = DataRecord->getData(); + break; + } + case TrieRecord::StorageKind::Standalone: { + if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header)) + return dataError("data record is not big enough to read the header"); + auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart()); + if (DataRecord.getTotalSize() < FileBuffer->getBufferSize()) + return dataError( + "data record span passed the end of the standalone file"); + for (auto InternRef : DataRecord.getRefs()) { + auto Index = getIndexProxyFromRef(InternRef); + if (!Index) + return Index.takeError(); + Refs.push_back(Index->Hash); + } + StoredData = DataRecord.getData(); + break; + } + case TrieRecord::StorageKind::StandaloneLeaf: + case TrieRecord::StorageKind::StandaloneLeaf0: { + StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer()); + if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) { + if (!FileBuffer->getBuffer().ends_with('\0')) + return dataError("standalone file is not zero terminated"); + StoredData = StoredData.drop_back(1); + } + break; + } + } + + SmallVector<uint8_t> ComputedHash; + Hasher(Refs, StoredData, ComputedHash); + if (I->Hash != ArrayRef(ComputedHash)) + return dataError("hash mismatch, got \'" + toHex(ComputedHash) + + "\' instead"); + + return Error::success(); + }); +} + +void OnDiskGraphDB::print(raw_ostream &OS) const { + OS << "on-disk-root-path: " << RootPath << "\n"; + + struct PoolInfo { + uint64_t Offset; + }; + SmallVector<PoolInfo> Pool; + + OS << "\n"; + OS << "index:\n"; + Index.print(OS, [&](ArrayRef<char> Data) { + assert(Data.size() == sizeof(TrieRecord)); + assert(isAligned(Align::Of<TrieRecord>(), Data.size())); + auto *R = reinterpret_cast<const TrieRecord *>(Data.data()); + TrieRecord::Data D = R->load(); + OS << " SK="; + switch (D.SK) { + case TrieRecord::StorageKind::Unknown: + OS << "unknown "; + break; + case TrieRecord::StorageKind::DataPool: + OS << "datapool "; + Pool.push_back({D.Offset.get()}); + break; + case TrieRecord::StorageKind::Standalone: + OS << "standalone-data "; + break; + case TrieRecord::StorageKind::StandaloneLeaf: + OS << "standalone-leaf "; + break; + case TrieRecord::StorageKind::StandaloneLeaf0: + OS << "standalone-leaf+0"; + break; + } + OS << " Offset=" << (void *)D.Offset.get(); + }); + if (Pool.empty()) + return; + + OS << "\n"; + OS << "pool:\n"; + llvm::sort( + Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; }); + for (PoolInfo PI : Pool) { + OS << "- addr=" << (void *)PI.Offset << " "; + auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset)); + if (!D) { + OS << "error: " << toString(D.takeError()); + return; + } + + OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize() + << " size=" << D->getTotalSize() + << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n"; + } +} + +Expected<OnDiskGraphDB::IndexProxy> +OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) { + auto P = Index.insertLazy( + Hash, [](FileOffset TentativeOffset, + OnDiskTrieRawHashMap::ValueProxy TentativeValue) { + assert(TentativeValue.Data.size() == sizeof(TrieRecord)); + assert( + isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data())); + new (TentativeValue.Data.data()) TrieRecord(); + }); + if (LLVM_UNLIKELY(!P)) + return P.takeError(); + + assert(*P && "Expected insertion"); + return getIndexProxyFromPointer(*P); +} + +OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer( + OnDiskTrieRawHashMap::ConstOnDiskPtr P) const { + assert(P); + assert(P.getOffset()); + return IndexProxy{P.getOffset(), P->Hash, + *const_cast<TrieRecord *>( + reinterpret_cast<const TrieRecord *>(P->Data.data()))}; +} + +Expected<ObjectID> OnDiskGraphDB::getReference(ArrayRef<uint8_t> Hash) { + auto I = indexHash(Hash); + if (LLVM_UNLIKELY(!I)) + return I.takeError(); + return getExternalReference(*I); +} + +ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) { + return getExternalReference(makeInternalRef(I.Offset)); +} + +std::optional<ObjectID> +OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest) { + auto tryUpstream = + [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> { + if (!UpstreamDB) + return std::nullopt; + std::optional<ObjectID> UpstreamID = + UpstreamDB->getExistingReference(Digest); + if (LLVM_UNLIKELY(!UpstreamID)) + return std::nullopt; + auto Ref = expectedToOptional(indexHash(Digest)); + if (!Ref) + return std::nullopt; + if (!I) + I.emplace(*Ref); + return getExternalReference(*I); + }; + + OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest); + if (!P) + return tryUpstream(std::nullopt); + IndexProxy I = getIndexProxyFromPointer(P); + TrieRecord::Data Obj = I.Ref.load(); + if (Obj.SK == TrieRecord::StorageKind::Unknown) + return tryUpstream(I); + return getExternalReference(makeInternalRef(I.Offset)); +} + +Expected<OnDiskGraphDB::IndexProxy> +OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const { + auto P = Index.recoverFromFileOffset(Ref.getFileOffset()); + if (LLVM_UNLIKELY(!P)) + return P.takeError(); + return getIndexProxyFromPointer(*P); +} + +Expected<ArrayRef<uint8_t>> OnDiskGraphDB::getDigest(InternalRef Ref) const { + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + return I->Hash; +} + +ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const { + return I.Hash; +} + +static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, + ObjectHandle OH) { + // Decode ObjectHandle to locate the stored content. + uint64_t Data = OH.getOpaqueData(); + if (Data & 1) { + const auto *SDIM = + reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1)); + return SDIM->getContent(); + } + + auto DataHandle = + cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data))); + assert(DataHandle.getData().end()[0] == 0 && "Null termination"); + return OnDiskContent{DataHandle, std::nullopt}; +} + +ArrayRef<char> OnDiskGraphDB::getObjectData(ObjectHandle Node) const { + OnDiskContent Content = getContentFromHandle(DataPool, Node); + if (Content.Bytes) + return *Content.Bytes; + assert(Content.Record && "Expected record or bytes"); + return Content.Record->getData(); +} + +InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const { + if (std::optional<DataRecordHandle> Record = + getContentFromHandle(DataPool, Node).Record) + return Record->getRefs(); + return std::nullopt; +} + +Expected<std::optional<ObjectHandle>> +OnDiskGraphDB::load(ObjectID ExternalRef) { + InternalRef Ref = getInternalRef(ExternalRef); + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + TrieRecord::Data Object = I->Ref.load(); + + if (Object.SK == TrieRecord::StorageKind::Unknown) { + if (!UpstreamDB) + return std::nullopt; + return faultInFromUpstream(ExternalRef); + } + + if (Object.SK == TrieRecord::StorageKind::DataPool) + return ObjectHandle::fromFileOffset(Object.Offset); + + // Only TrieRecord::StorageKind::Standalone (and variants) need to be + // explicitly loaded. + // + // There's corruption if standalone objects have offsets, or if we get here + // for something that isn't standalone. + if (Object.Offset) + return createCorruptObjectError(getDigest(*I)); + switch (Object.SK) { + case TrieRecord::StorageKind::Unknown: + case TrieRecord::StorageKind::DataPool: + llvm_unreachable("unexpected storage kind"); + case TrieRecord::StorageKind::Standalone: + case TrieRecord::StorageKind::StandaloneLeaf0: + case TrieRecord::StorageKind::StandaloneLeaf: + break; + } + + // Load it from disk. + // + // Note: Creation logic guarantees that data that needs null-termination is + // suitably 0-padded. Requiring null-termination here would be too expensive + // for extremely large objects that happen to be page-aligned. + SmallString<256> Path; + getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path); + + auto File = sys::fs::openNativeFileForRead(Path); + if (!File) + return createFileError(Path, File.takeError()); + + auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); }); + + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(*File, Status)) + return createCorruptObjectError(getDigest(*I)); + + std::error_code EC; + auto Region = std::make_unique<sys::fs::mapped_file_region>( + *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC); + if (EC) + return createCorruptObjectError(getDigest(*I)); + + return ObjectHandle::fromMemory( + static_cast<StandaloneDataMapTy *>(StandaloneData) + ->insert(I->Hash, Object.SK, std::move(Region))); +} + +Expected<bool> OnDiskGraphDB::isMaterialized(ObjectID Ref) { + auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true); + if (!Presence) + return Presence.takeError(); + + switch (*Presence) { + case ObjectPresence::Missing: + return false; + case ObjectPresence::InPrimaryDB: + return true; + case ObjectPresence::OnlyInUpstreamDB: + if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult) + return FaultInResult.takeError(); + return true; + } +} + +Expected<OnDiskGraphDB::ObjectPresence> +OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef, + bool CheckUpstream) const { + InternalRef Ref = getInternalRef(ExternalRef); + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + + TrieRecord::Data Object = I->Ref.load(); + if (Object.SK != TrieRecord::StorageKind::Unknown) + return ObjectPresence::InPrimaryDB; + if (!CheckUpstream || !UpstreamDB) + return ObjectPresence::Missing; + std::optional<ObjectID> UpstreamID = + UpstreamDB->getExistingReference(getDigest(*I)); + return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB + : ObjectPresence::Missing; +} + +InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) { + return InternalRef::getFromOffset(IndexOffset); +} + +void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I, + SmallVectorImpl<char> &Path) const { + Path.assign(RootPath.begin(), RootPath.end()); + sys::path::append(Path, + Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion); +} + +OnDiskContent StandaloneDataInMemory::getContent() const { + bool Leaf0 = false; + bool Leaf = false; + switch (SK) { + default: + llvm_unreachable("Storage kind must be standalone"); + case TrieRecord::StorageKind::Standalone: + break; + case TrieRecord::StorageKind::StandaloneLeaf0: + Leaf = Leaf0 = true; + break; + case TrieRecord::StorageKind::StandaloneLeaf: + Leaf = true; + break; + } + + if (Leaf) { + StringRef Data(Region->data(), Region->size()); + assert(Data.drop_back(Leaf0).end()[0] == 0 && + "Standalone node data missing null termination"); + return OnDiskContent{std::nullopt, + arrayRefFromStringRef<char>(Data.drop_back(Leaf0))}; + } + + DataRecordHandle Record = DataRecordHandle::get(Region->data()); + assert(Record.getData().end()[0] == 0 && + "Standalone object record missing null termination for data"); + return OnDiskContent{Record, std::nullopt}; +} + +static Expected<MappedTempFile> createTempFile(StringRef FinalPath, + uint64_t Size) { + assert(Size && "Unexpected request for an empty temp file"); + Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%"); + if (!File) + return File.takeError(); + + if (Error E = preallocateFileTail(File->FD, 0, Size).takeError()) + return createFileError(File->TmpName, std::move(E)); + + if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size)) + return createFileError(File->TmpName, EC); + + std::error_code EC; + sys::fs::mapped_file_region Map(sys::fs::convertFDToNativeFile(File->FD), + sys::fs::mapped_file_region::readwrite, Size, + 0, EC); + if (EC) + return createFileError(File->TmpName, EC); + return MappedTempFile(std::move(*File), std::move(Map)); +} + +static size_t getPageSize() { + static int PageSize = sys::Process::getPageSizeEstimate(); + return PageSize; +} + +Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) { + assert(Data.size() > TrieRecord::MaxEmbeddedSize && + "Expected a bigger file for external content..."); + + bool Leaf0 = isAligned(Align(getPageSize()), Data.size()); + TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0 + : TrieRecord::StorageKind::StandaloneLeaf; + + SmallString<256> Path; + int64_t FileSize = Data.size() + Leaf0; + getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path); + + // Write the file. Don't reuse this mapped_file_region, which is read/write. + // Let load() pull up one that's read-only. + Expected<MappedTempFile> File = createTempFile(Path, FileSize); + if (!File) + return File.takeError(); + assert(File->size() == (uint64_t)FileSize); + llvm::copy(Data, File->data()); + if (Leaf0) + File->data()[Data.size()] = 0; + assert(File->data()[Data.size()] == 0); + if (Error E = File->keep(Path)) + return E; + + // Store the object reference. + TrieRecord::Data Existing; + { + TrieRecord::Data Leaf{SK, FileOffset()}; + if (I.Ref.compare_exchange_strong(Existing, Leaf)) { + recordStandaloneSizeIncrease(FileSize); + return Error::success(); + } + } + + // If there was a race, confirm that the new value has valid storage. + if (Existing.SK == TrieRecord::StorageKind::Unknown) + return createCorruptObjectError(getDigest(I)); + + return Error::success(); +} + +Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs, + ArrayRef<char> Data) { + auto I = getIndexProxyFromRef(getInternalRef(ID)); + if (LLVM_UNLIKELY(!I)) + return I.takeError(); + + // Early return in case the node exists. + { + TrieRecord::Data Existing = I->Ref.load(); + if (Existing.SK != TrieRecord::StorageKind::Unknown) + return Error::success(); + } + + // Big leaf nodes. + if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize) + return createStandaloneLeaf(*I, Data); + + // TODO: Check whether it's worth checking the index for an already existing + // object (like storeTreeImpl() does) before building up the + // InternalRefVector. + InternalRefVector InternalRefs; + for (ObjectID Ref : Refs) + InternalRefs.push_back(getInternalRef(Ref)); + + // Create the object. + + DataRecordHandle::Input Input{InternalRefs, Data}; + + // Compute the storage kind, allocate it, and create the record. + TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown; + FileOffset PoolOffset; + SmallString<256> Path; + std::optional<MappedTempFile> File; + std::optional<uint64_t> FileSize; + auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> { + getStandalonePath(TrieRecord::getStandaloneFilePrefix( + TrieRecord::StorageKind::Standalone), + *I, Path); + if (Error E = createTempFile(Path, Size).moveInto(File)) + return std::move(E); + assert(File->size() == Size); + FileSize = Size; + SK = TrieRecord::StorageKind::Standalone; + return File->data(); + }; + auto Alloc = [&](size_t Size) -> Expected<char *> { + if (Size <= TrieRecord::MaxEmbeddedSize) { + SK = TrieRecord::StorageKind::DataPool; + auto P = DataPool.allocate(Size); + if (LLVM_UNLIKELY(!P)) { + char *NewAlloc = nullptr; + auto NewE = handleErrors( + P.takeError(), [&](std::unique_ptr<StringError> E) -> Error { + if (E->convertToErrorCode() == std::errc::not_enough_memory) + return AllocStandaloneFile(Size).moveInto(NewAlloc); + return Error(std::move(E)); + }); + if (!NewE) + return NewAlloc; + return std::move(NewE); + } + PoolOffset = P->getOffset(); + LLVM_DEBUG({ + dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get() + << " size=" << Size + << " end=" << (void *)(PoolOffset.get() + Size) << "\n"; + }); + return (*P)->data(); + } + return AllocStandaloneFile(Size); + }; + + DataRecordHandle Record; + if (Error E = + DataRecordHandle::createWithError(Alloc, Input).moveInto(Record)) + return E; + assert(Record.getData().end()[0] == 0 && "Expected null-termination"); + assert(Record.getData() == Input.Data && "Expected initialization"); + assert(SK != TrieRecord::StorageKind::Unknown); + assert(bool(File) != bool(PoolOffset) && + "Expected either a mapped file or a pooled offset"); + + // Check for a race before calling MappedTempFile::keep(). + // + // Then decide what to do with the file. Better to discard than overwrite if + // another thread/process has already added this. + TrieRecord::Data Existing = I->Ref.load(); + { + TrieRecord::Data NewObject{SK, PoolOffset}; + if (File) { + if (Existing.SK == TrieRecord::StorageKind::Unknown) { + // Keep the file! + if (Error E = File->keep(Path)) + return E; + } else { + File.reset(); + } + } + + // If we didn't already see a racing/existing write, then try storing the + // new object. If that races, confirm that the new value has valid storage. + // + // TODO: Find a way to reuse the storage from the new-but-abandoned record + // handle. + if (Existing.SK == TrieRecord::StorageKind::Unknown) { + if (I->Ref.compare_exchange_strong(Existing, NewObject)) { + if (FileSize) + recordStandaloneSizeIncrease(*FileSize); + return Error::success(); + } + } + } + + if (Existing.SK == TrieRecord::StorageKind::Unknown) + return createCorruptObjectError(getDigest(*I)); + + // Load existing object. + return Error::success(); +} + +void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) { + standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed); +} + +std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const { + MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader(); + assert(UserHeader.size() == sizeof(std::atomic<uint64_t>)); + assert(isAddrAligned(Align(8), UserHeader.data())); + return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data()); +} + +uint64_t OnDiskGraphDB::getStandaloneStorageSize() const { + return standaloneStorageSize().load(std::memory_order_relaxed); +} + +size_t OnDiskGraphDB::getStorageSize() const { + return Index.size() + DataPool.size() + getStandaloneStorageSize(); +} + +unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const { + unsigned IndexPercent = Index.size() * 100ULL / Index.capacity(); + unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity(); + return std::max(IndexPercent, DataPercent); +} + +Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open( + StringRef AbsPath, StringRef HashName, unsigned HashByteSize, + std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) { + if (std::error_code EC = sys::fs::create_directories(AbsPath)) + return createFileError(AbsPath, EC); + + constexpr uint64_t MB = 1024ull * 1024ull; + constexpr uint64_t GB = 1024ull * 1024ull * 1024ull; + + uint64_t MaxIndexSize = 12 * GB; + uint64_t MaxDataPoolSize = 24 * GB; + + if (useSmallMappingSize(AbsPath)) { + MaxIndexSize = 1 * GB; + MaxDataPoolSize = 2 * GB; + } + + auto CustomSize = getOverriddenMaxMappingSize(); + if (!CustomSize) + return CustomSize.takeError(); + if (*CustomSize) + MaxIndexSize = MaxDataPoolSize = **CustomSize; + + SmallString<256> IndexPath(AbsPath); + sys::path::append(IndexPath, IndexFilePrefix + CASFormatVersion); + std::optional<OnDiskTrieRawHashMap> Index; + if (Error E = OnDiskTrieRawHashMap::create( + IndexPath, IndexTableName + "[" + HashName + "]", + HashByteSize * CHAR_BIT, + /*DataSize=*/sizeof(TrieRecord), MaxIndexSize, + /*MinFileSize=*/MB) + .moveInto(Index)) + return std::move(E); + + uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>); + + SmallString<256> DataPoolPath(AbsPath); + sys::path::append(DataPoolPath, DataPoolFilePrefix + CASFormatVersion); + std::optional<OnDiskDataAllocator> DataPool; + StringRef PolicyName = + Policy == FaultInPolicy::SingleNode ? "single" : "full"; + if (Error E = OnDiskDataAllocator::create( + DataPoolPath, + DataPoolTableName + "[" + HashName + "]" + PolicyName, + MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize, + [](void *UserHeaderPtr) { + new (UserHeaderPtr) std::atomic<uint64_t>(0); + }) + .moveInto(DataPool)) + return std::move(E); + if (DataPool->getUserHeader().size() != UserHeaderSize) + return createStringError(llvm::errc::argument_out_of_domain, + "unexpected user header in '" + DataPoolPath + + "'"); + + return std::unique_ptr<OnDiskGraphDB>( + new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), + std::move(UpstreamDB), Policy)); +} + +OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, + OnDiskDataAllocator DataPool, + std::unique_ptr<OnDiskGraphDB> UpstreamDB, + FaultInPolicy Policy) + : Index(std::move(Index)), DataPool(std::move(DataPool)), + RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), + FIPolicy(Policy) { + /// Lifetime for "big" objects not in DataPool. + /// + /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something + /// simpler on the assumption there won't be much contention since most data + /// is not big. If there is contention, and we've already fixed ObjectProxy + /// object handles to be cheap enough to use consistently, the fix might be + /// to use better use of them rather than optimizing this map. + /// + /// FIXME: Figure out the right number of shards, if any. + StandaloneData = new StandaloneDataMapTy(); +} + +OnDiskGraphDB::~OnDiskGraphDB() { + delete static_cast<StandaloneDataMapTy *>(StandaloneData); +} + +Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID, + ObjectHandle UpstreamNode) { + // Copies the full CAS tree from upstream. Uses depth-first copying to protect + // against the process dying during importing and leaving the database with an + // incomplete tree. Note that if the upstream has missing nodes then the tree + // will be copied with missing nodes as well, it won't be considered an error. + + struct UpstreamCursor { + ObjectHandle Node; + size_t RefsCount; + object_refs_iterator RefI; + object_refs_iterator RefE; + }; + /// Keeps track of the state of visitation for current node and all of its + /// parents. + SmallVector<UpstreamCursor, 16> CursorStack; + /// Keeps track of the currently visited nodes as they are imported into + /// primary database, from current node and its parents. When a node is + /// entered for visitation it appends its own ID, then appends referenced IDs + /// as they get imported. When a node is fully imported it removes the + /// referenced IDs from the bottom of the stack which leaves its own ID at the + /// bottom, adding to the list of referenced IDs for the parent node. + SmallVector<ObjectID, 128> PrimaryNodesStack; + + auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) { + PrimaryNodesStack.push_back(PrimaryID); + if (!Node) + return; + auto Refs = UpstreamDB->getObjectRefs(*Node); + CursorStack.push_back({*Node, + (size_t)std::distance(Refs.begin(), Refs.end()), + Refs.begin(), Refs.end()}); + }; + + enqueueNode(PrimaryID, UpstreamNode); + + while (!CursorStack.empty()) { + UpstreamCursor &Cur = CursorStack.back(); + if (Cur.RefI == Cur.RefE) { + // Copy the node data into the primary store. + // FIXME: Use hard-link or cloning if the file-system supports it and data + // is stored into a separate file. + + // The bottom of \p PrimaryNodesStack contains the primary ID for the + // current node plus the list of imported referenced IDs. + assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1); + ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1); + auto PrimaryRefs = ArrayRef(PrimaryNodesStack) + .slice(PrimaryNodesStack.size() - Cur.RefsCount); + auto Data = UpstreamDB->getObjectData(Cur.Node); + if (Error E = store(PrimaryID, PrimaryRefs, Data)) + return E; + // Remove the current node and its IDs from the stack. + PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount); + CursorStack.pop_back(); + continue; + } + + ObjectID UpstreamID = *(Cur.RefI++); + auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID)); + if (LLVM_UNLIKELY(!PrimaryID)) + return PrimaryID.takeError(); + if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) { + // This \p ObjectID already exists in the primary. Either it was imported + // via \p importFullTree or the client created it, in which case the + // client takes responsibility for how it was formed. + enqueueNode(*PrimaryID, std::nullopt); + continue; + } + Expected<std::optional<ObjectHandle>> UpstreamNode = + UpstreamDB->load(UpstreamID); + if (!UpstreamNode) + return UpstreamNode.takeError(); + enqueueNode(*PrimaryID, *UpstreamNode); + } + + assert(PrimaryNodesStack.size() == 1); + assert(PrimaryNodesStack.front() == PrimaryID); + return Error::success(); +} + +Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, + ObjectHandle UpstreamNode) { + // Copies only a single node, it doesn't copy the referenced nodes. + + // Copy the node data into the primary store. + // FIXME: Use hard-link or cloning if the file-system supports it and data is + // stored into a separate file. + + auto Data = UpstreamDB->getObjectData(UpstreamNode); + auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode); + SmallVector<ObjectID, 64> Refs; + Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end())); + for (ObjectID UpstreamRef : UpstreamRefs) { + auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef)); + if (LLVM_UNLIKELY(!Ref)) + return Ref.takeError(); + Refs.push_back(*Ref); + } + + return store(PrimaryID, Refs, Data); +} + +Expected<std::optional<ObjectHandle>> +OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { + assert(UpstreamDB); + + auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID)); + if (LLVM_UNLIKELY(!UpstreamID)) + return UpstreamID.takeError(); + + Expected<std::optional<ObjectHandle>> UpstreamNode = + UpstreamDB->load(*UpstreamID); + if (!UpstreamNode) + return UpstreamNode.takeError(); + if (!*UpstreamNode) + return std::nullopt; + + if (Error E = FIPolicy == FaultInPolicy::SingleNode + ? importSingleNode(PrimaryID, **UpstreamNode) + : importFullTree(PrimaryID, **UpstreamNode)) + return std::move(E); + return load(PrimaryID); +} diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp new file mode 100644 index 0000000..2186071 --- /dev/null +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -0,0 +1,113 @@ +//===- OnDiskKeyValueDB.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements OnDiskKeyValueDB, an ondisk key value database. +/// +/// The KeyValue database file is named `actions.<version>` inside the CAS +/// directory. The database stores a mapping between a fixed-sized key and a +/// fixed-sized value, where the size of key and value can be configured when +/// opening the database. +/// +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "OnDiskCommon.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +static constexpr StringLiteral ActionCacheFile = "actions."; + +Expected<ArrayRef<char>> OnDiskKeyValueDB::put(ArrayRef<uint8_t> Key, + ArrayRef<char> Value) { + if (LLVM_UNLIKELY(Value.size() != ValueSize)) + return createStringError(errc::invalid_argument, + "expected value size of " + itostr(ValueSize) + + ", got: " + itostr(Value.size())); + assert(Value.size() == ValueSize); + auto ActionP = Cache.insertLazy( + Key, [&](FileOffset TentativeOffset, + OnDiskTrieRawHashMap::ValueProxy TentativeValue) { + assert(TentativeValue.Data.size() == ValueSize); + llvm::copy(Value, TentativeValue.Data.data()); + }); + if (LLVM_UNLIKELY(!ActionP)) + return ActionP.takeError(); + return (*ActionP)->Data; +} + +Expected<std::optional<ArrayRef<char>>> +OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) { + // Check the result cache. + OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); + if (!ActionP) + return std::nullopt; + assert(isAddrAligned(Align(8), ActionP->Data.data())); + return ActionP->Data; +} + +Expected<std::unique_ptr<OnDiskKeyValueDB>> +OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, + StringRef ValueName, size_t ValueSize) { + if (std::error_code EC = sys::fs::create_directories(Path)) + return createFileError(Path, EC); + + SmallString<256> CachePath(Path); + sys::path::append(CachePath, ActionCacheFile + CASFormatVersion); + constexpr uint64_t MB = 1024ull * 1024ull; + constexpr uint64_t GB = 1024ull * 1024ull * 1024ull; + + uint64_t MaxFileSize = GB; + auto CustomSize = getOverriddenMaxMappingSize(); + if (!CustomSize) + return CustomSize.takeError(); + if (*CustomSize) + MaxFileSize = **CustomSize; + + std::optional<OnDiskTrieRawHashMap> ActionCache; + if (Error E = OnDiskTrieRawHashMap::create( + CachePath, + "llvm.actioncache[" + HashName + "->" + ValueName + "]", + KeySize * 8, + /*DataSize=*/ValueSize, MaxFileSize, /*MinFileSize=*/MB) + .moveInto(ActionCache)) + return std::move(E); + + return std::unique_ptr<OnDiskKeyValueDB>( + new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); +} + +Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { + return Cache.validate( + [&](FileOffset Offset, + OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad cache value at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + + if (Record.Data.size() != ValueSize) + return formatError("wrong cache value size"); + if (!isAddrAligned(Align(8), Record.Data.data())) + return formatError("wrong cache value alignment"); + if (CheckValue) + return CheckValue(Offset, Record.Data); + return Error::success(); + }); +} diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 8052773..8637b55 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -2427,11 +2427,13 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) { uint64_t OrigStmtSeq = StmtSeq.get(); // 1. Get the original row index from the stmt list offset. auto OrigRowIter = SeqOffToOrigRow.find(OrigStmtSeq); + const uint64_t InvalidOffset = + Unit.getOrigUnit().getFormParams().getDwarfMaxOffset(); // Check whether we have an output sequence for the StmtSeq offset. // Some sequences are discarded by the DWARFLinker if they are invalid // (empty). if (OrigRowIter == SeqOffToOrigRow.end()) { - StmtSeq.set(UINT64_MAX); + StmtSeq.set(InvalidOffset); continue; } size_t OrigRowIndex = OrigRowIter->second; @@ -2441,7 +2443,7 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) { if (NewRowIter == OrigRowToNewRow.end()) { // If the original row index is not found in the map, update the // stmt_sequence attribute to the 'invalid offset' magic value. - StmtSeq.set(UINT64_MAX); + StmtSeq.set(InvalidOffset); continue; } diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index fa39603..a326a01 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -320,12 +320,16 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, // Attempt to retrieve DW_AT_LLVM_stmt_sequence if present. std::optional<uint64_t> StmtSeqOffset; if (auto StmtSeqAttr = Die.find(llvm::dwarf::DW_AT_LLVM_stmt_sequence)) { - // The `DW_AT_LLVM_stmt_sequence` attribute might be set to `UINT64_MAX` - // when it refers to an empty line sequence. In such cases, the DWARF linker - // will exclude the empty sequence from the final output and assign - // `UINT64_MAX` to the `DW_AT_LLVM_stmt_sequence` attribute. - uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, UINT64_MAX); - if (StmtSeqVal != UINT64_MAX) + // The `DW_AT_LLVM_stmt_sequence` attribute might be set to an invalid + // sentinel value when it refers to an empty line sequence. In such cases, + // the DWARF linker will exclude the empty sequence from the final output + // and assign the sentinel value to the `DW_AT_LLVM_stmt_sequence` + // attribute. The sentinel value is UINT32_MAX for DWARF32 and UINT64_MAX + // for DWARF64. + const uint64_t InvalidOffset = + Die.getDwarfUnit()->getFormParams().getDwarfMaxOffset(); + uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, InvalidOffset); + if (StmtSeqVal != InvalidOffset) StmtSeqOffset = StmtSeqVal; } diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 7f0ea78..d4901d9 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -2903,7 +2903,7 @@ bool MasmParser::parseIdentifier(StringRef &Res, if (Position == StartOfStatement && StringSwitch<bool>(Res) .CaseLower("echo", true) - .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true) + .CasesLower({"ifdef", "ifndef", "elseifdef", "elseifndef"}, true) .Default(false)) { ExpandNextToken = DoNotExpandMacros; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 12c600f..d5117da 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -700,7 +700,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { // csel instruction. If so, return the folded opcode, and the replacement // register. static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, - unsigned *NewVReg = nullptr) { + unsigned *NewReg = nullptr) { VReg = removeCopies(MRI, VReg); if (!Register::isVirtualRegister(VReg)) return 0; @@ -708,8 +708,37 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); const MachineInstr *DefMI = MRI.getVRegDef(VReg); unsigned Opc = 0; - unsigned SrcOpNum = 0; + unsigned SrcReg = 0; switch (DefMI->getOpcode()) { + case AArch64::SUBREG_TO_REG: + // Check for the following way to define an 64-bit immediate: + // %0:gpr32 = MOVi32imm 1 + // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32 + if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0) + return 0; + if (!DefMI->getOperand(2).isReg()) + return 0; + if (!DefMI->getOperand(3).isImm() || + DefMI->getOperand(3).getImm() != AArch64::sub_32) + return 0; + DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg()); + if (DefMI->getOpcode() != AArch64::MOVi32imm) + return 0; + if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1) + return 0; + assert(Is64Bit); + SrcReg = AArch64::XZR; + Opc = AArch64::CSINCXr; + break; + + case AArch64::MOVi32imm: + case AArch64::MOVi64imm: + if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1) + return 0; + SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR; + Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; + break; + case AArch64::ADDSXri: case AArch64::ADDSWri: // if NZCV is used, do not fold. @@ -724,7 +753,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || DefMI->getOperand(3).getImm() != 0) return 0; - SrcOpNum = 1; + SrcReg = DefMI->getOperand(1).getReg(); Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; break; @@ -734,7 +763,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) return 0; - SrcOpNum = 2; + SrcReg = DefMI->getOperand(2).getReg(); Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; break; } @@ -753,17 +782,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) return 0; - SrcOpNum = 2; + SrcReg = DefMI->getOperand(2).getReg(); Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; break; } default: return 0; } - assert(Opc && SrcOpNum && "Missing parameters"); + assert(Opc && SrcReg && "Missing parameters"); - if (NewVReg) - *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); + if (NewReg) + *NewReg = SrcReg; return Opc; } @@ -964,28 +993,34 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, // Try folding simple instructions into the csel. if (TryFold) { - unsigned NewVReg = 0; - unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); + unsigned NewReg = 0; + unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg); if (FoldedOpc) { // The folded opcodes csinc, csinc and csneg apply the operation to // FalseReg, so we need to invert the condition. CC = AArch64CC::getInvertedCondCode(CC); TrueReg = FalseReg; } else - FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); + FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg); // Fold the operation. Leave any dead instructions for DCE to clean up. if (FoldedOpc) { - FalseReg = NewVReg; + FalseReg = NewReg; Opc = FoldedOpc; - // The extends the live range of NewVReg. - MRI.clearKillFlags(NewVReg); + // Extend the live range of NewReg. + MRI.clearKillFlags(NewReg); } } // Pull all virtual register into the appropriate class. MRI.constrainRegClass(TrueReg, RC); - MRI.constrainRegClass(FalseReg, RC); + // FalseReg might be WZR or XZR if the folded operand is a literal 1. + assert( + (FalseReg.isVirtual() || FalseReg == AArch64::WZR || + FalseReg == AArch64::XZR) && + "FalseReg was folded into a non-virtual register other than WZR or XZR"); + if (FalseReg.isVirtual()) + MRI.constrainRegClass(FalseReg, RC); // Insert the csel. BuildMI(MBB, I, DL, get(Opc), DstReg) @@ -5063,7 +5098,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool RenamableDest, bool RenamableSrc) const { if (AArch64::GPR32spRegClass.contains(DestReg) && - (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { + AArch64::GPR32spRegClass.contains(SrcReg)) { if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { // If either operand is WSP, expand to ADD #0. if (Subtarget.hasZeroCycleRegMoveGPR64() && @@ -5088,21 +5123,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } - } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) { - BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) - .addImm(0) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else if (Subtarget.hasZeroCycleRegMoveGPR64() && !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); assert(DestRegX.isValid() && "Destination super-reg not valid"); - MCRegister SrcRegX = - SrcReg == AArch64::WZR - ? AArch64::XZR - : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); + MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); assert(SrcRegX.isValid() && "Source super-reg not valid"); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate @@ -5121,6 +5149,51 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // GPR32 zeroing + if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) { + if (Subtarget.hasZeroCycleZeroingGPR32()) { + BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else { + BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) + .addReg(AArch64::WZR) + .addReg(AArch64::WZR); + } + return; + } + + if (AArch64::GPR64spRegClass.contains(DestReg) && + AArch64::GPR64spRegClass.contains(SrcReg)) { + if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { + // If either operand is SP, expand to ADD #0. + BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else { + // Otherwise, expand to ORR XZR. + BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) + .addReg(AArch64::XZR) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + return; + } + + // GPR64 zeroing + if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) { + if (Subtarget.hasZeroCycleZeroingGPR64()) { + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else { + BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) + .addReg(AArch64::XZR) + .addReg(AArch64::XZR); + } + return; + } + // Copy a Predicate register by ORRing with itself. if (AArch64::PPRRegClass.contains(DestReg) && AArch64::PPRRegClass.contains(SrcReg)) { @@ -5205,27 +5278,6 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (AArch64::GPR64spRegClass.contains(DestReg) && - (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { - if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { - // If either operand is SP, expand to ADD #0. - BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); - } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) { - BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) - .addImm(0) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); - } else { - // Otherwise, expand to ORR XZR. - BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) - .addReg(AArch64::XZR) - .addReg(SrcReg, getKillRegState(KillSrc)); - } - return; - } - // Copy a DDDD register quad by copying the individual sub-registers. if (AArch64::DDDDRegClass.contains(DestReg) && AArch64::DDDDRegClass.contains(SrcReg)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 12915c73..97c2c9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3446,10 +3446,14 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { : 0); // swz MachineMemOperand *LoadMMO = *MI.memoperands_begin(); + // Don't set the offset value here because the pointer points to the base of + // the buffer. MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); - LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm(); + MachinePointerInfo StorePtrI = LoadPtrI; - StorePtrI.V = nullptr; + LoadPtrI.V = PoisonValue::get(PointerType::get(MF->getFunction().getContext(), + AMDGPUAS::BUFFER_RESOURCE)); + LoadPtrI.AddrSpace = AMDGPUAS::BUFFER_RESOURCE; StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; auto F = LoadMMO->getFlags() & @@ -3627,13 +3631,17 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ if (isSGPR(Addr)) MIB.addReg(VOffset); - MIB.add(MI.getOperand(4)) // offset - .add(MI.getOperand(5)); // cpol + MIB.add(MI.getOperand(4)); // offset + + unsigned Aux = MI.getOperand(5).getImm(); + MIB.addImm(Aux & ~AMDGPU::CPol::VIRTUAL_BITS); // cpol MachineMemOperand *LoadMMO = *MI.memoperands_begin(); MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); LoadPtrI.Offset = MI.getOperand(4).getImm(); MachinePointerInfo StorePtrI = LoadPtrI; + LoadPtrI.V = PoisonValue::get(PointerType::get(MF->getFunction().getContext(), + AMDGPUAS::GLOBAL_ADDRESS)); LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS; StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; auto F = LoadMMO->getFlags() & diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index ecc2824..b7a92a0 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -423,6 +423,9 @@ enum CPol { // Volatile (used to preserve/signal operation volatility for buffer // operations not a real instruction bit) VOLATILE = 1 << 31, + // The set of "cache policy" bits used for compiler features that + // do not correspond to handware features. + VIRTUAL_BITS = VOLATILE, }; } // namespace CPol diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a2841c11..a757421 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1651,6 +1651,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8); Info.ptrVal = CI.getArgOperand(1); Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1)); + if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE) + Info.flags |= MachineMemOperand::MOVolatile; return true; } case Intrinsic::amdgcn_ds_bvh_stack_rtn: @@ -11219,8 +11222,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, MachinePointerInfo StorePtrI = LoadPtrI; LoadPtrI.V = PoisonValue::get( - PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS)); - LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS; + PointerType::get(*DAG.getContext(), AMDGPUAS::BUFFER_RESOURCE)); + LoadPtrI.AddrSpace = AMDGPUAS::BUFFER_RESOURCE; StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; auto F = LoadMMO->getFlags() & @@ -11307,7 +11310,11 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } Ops.push_back(Op.getOperand(5)); // Offset - Ops.push_back(Op.getOperand(6)); // CPol + + unsigned Aux = Op.getConstantOperandVal(6); + Ops.push_back(DAG.getTargetConstant(Aux & ~AMDGPU::CPol::VIRTUAL_BITS, DL, + MVT::i32)); // CPol + Ops.push_back(M0Val.getValue(0)); // Chain Ops.push_back(M0Val.getValue(1)); // Glue diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 362ef14..bdbc000 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/TargetParser/TargetParser.h" @@ -277,6 +278,12 @@ public: /// rmw operation, "std::nullopt" otherwise. std::optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator &MI) const; + + /// \returns DMA to LDS info if \p MI is as a direct-to/from-LDS load/store, + /// along with an indication of whether this is a load or store. If it is not + /// a direct-to-LDS operation, returns std::nullopt. + std::optional<SIMemOpInfo> + getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const; }; class SICacheControl { @@ -703,6 +710,9 @@ private: /// instructions are added/deleted or \p MI is modified, false otherwise. bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); + /// Expands LDS DMA operation \p MI. Returns true if instructions are + /// added/deleted or \p MI is modified, false otherwise. + bool expandLDSDMA(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); public: SIMemoryLegalizer(const MachineModuleInfo &MMI) : MMI(MMI) {}; @@ -832,6 +842,9 @@ SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const { return SIAtomicAddrSpace::SCRATCH; if (AS == AMDGPUAS::REGION_ADDRESS) return SIAtomicAddrSpace::GDS; + if (AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE || + AS == AMDGPUAS::BUFFER_STRIDED_POINTER) + return SIAtomicAddrSpace::GLOBAL; return SIAtomicAddrSpace::OTHER; } @@ -987,6 +1000,16 @@ std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo( return constructFromMIWithMMO(MI); } +std::optional<SIMemOpInfo> +SIMemOpAccess::getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!SIInstrInfo::isLDSDMA(*MI)) + return std::nullopt; + + return constructFromMIWithMMO(MI); +} + SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) { TII = ST.getInstrInfo(); IV = getIsaVersion(ST.getCPU()); @@ -1099,7 +1122,7 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal( // Only handle load and store, not atomic read-modify-write insructions. The // latter use glc to indicate if the atomic returns a result and so must not // be used for cache control. - assert(MI->mayLoad() ^ MI->mayStore()); + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); // Only update load and store, not LLVM IR atomic read-modify-write // instructions. The latter are always marked as volatile so cannot sensibly @@ -1429,7 +1452,7 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal( // Only handle load and store, not atomic read-modify-write insructions. The // latter use glc to indicate if the atomic returns a result and so must not // be used for cache control. - assert(MI->mayLoad() ^ MI->mayStore()); + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); // Only update load and store, not LLVM IR atomic read-modify-write // instructions. The latter are always marked as volatile so cannot sensibly @@ -1733,7 +1756,7 @@ bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal( // Only handle load and store, not atomic read-modify-write insructions. The // latter use glc to indicate if the atomic returns a result and so must not // be used for cache control. - assert(MI->mayLoad() ^ MI->mayStore()); + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); // Only update load and store, not LLVM IR atomic read-modify-write // instructions. The latter are always marked as volatile so cannot sensibly @@ -1968,7 +1991,7 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal( // Only handle load and store, not atomic read-modify-write insructions. The // latter use glc to indicate if the atomic returns a result and so must not // be used for cache control. - assert(MI->mayLoad() ^ MI->mayStore()); + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); // Only update load and store, not LLVM IR atomic read-modify-write // instructions. The latter are always marked as volatile so cannot sensibly @@ -2266,7 +2289,7 @@ bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal( // Only handle load and store, not atomic read-modify-write insructions. The // latter use glc to indicate if the atomic returns a result and so must not // be used for cache control. - assert(MI->mayLoad() ^ MI->mayStore()); + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); // Only update load and store, not LLVM IR atomic read-modify-write // instructions. The latter are always marked as volatile so cannot sensibly @@ -2611,7 +2634,7 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const { // Only handle load and store, not atomic read-modify-write instructions. - assert(MI->mayLoad() ^ MI->mayStore()); + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); // Only update load and store, not LLVM IR atomic read-modify-write // instructions. The latter are always marked as volatile so cannot sensibly @@ -2934,6 +2957,23 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, return Changed; } +bool SIMemoryLegalizer::expandLDSDMA(const SIMemOpInfo &MOI, + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoad() && MI->mayStore()); + + // The volatility or nontemporal-ness of the operation is a + // function of the global memory, not the LDS. + SIMemOp OpKind = + SIInstrInfo::mayWriteLDSThroughDMA(*MI) ? SIMemOp::LOAD : SIMemOp::STORE; + + // Handle volatile and/or nontemporal markers on direct-to-LDS loads and + // stores. The operation is treated as a volatile/nontemporal store + // to its second argument. + return CC->enableVolatileAndOrNonTemporal( + MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(), + MOI.isNonTemporal(), MOI.isLastUse()); +} + bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) { const MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); @@ -2985,14 +3025,17 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) { if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) continue; - if (const auto &MOI = MOA.getLoadInfo(MI)) + if (const auto &MOI = MOA.getLoadInfo(MI)) { Changed |= expandLoad(*MOI, MI); - else if (const auto &MOI = MOA.getStoreInfo(MI)) { + } else if (const auto &MOI = MOA.getStoreInfo(MI)) { Changed |= expandStore(*MOI, MI); - } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) + } else if (const auto &MOI = MOA.getLDSDMAInfo(MI)) { + Changed |= expandLDSDMA(*MOI, MI); + } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) { Changed |= expandAtomicFence(*MOI, MI); - else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) + } else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) { Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI); + } } } diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index ba4b489..9b5fc9d 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -14,6 +14,7 @@ #include "BPF.h" #include "BPFCORE.h" #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -23,6 +24,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -301,21 +303,59 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { BTFType.NameOff = BDebug.addString(STy->getName()); + if (STy->getTag() == dwarf::DW_TAG_variant_part) { + // Variant parts might have a discriminator, which has its own memory + // location, and variants, which share the memory location afterwards. LLVM + // DI doesn't consider discriminator as an element and instead keeps + // it as a separate reference. + // To keep BTF simple, let's represent the structure as an union with + // discriminator as the first element. + // The offsets inside variant types are already handled correctly in the + // DI. + const auto *DTy = STy->getDiscriminator(); + if (DTy) { + struct BTF::BTFMember Discriminator; + + Discriminator.NameOff = BDebug.addString(DTy->getName()); + Discriminator.Offset = DTy->getOffsetInBits(); + const auto *BaseTy = DTy->getBaseType(); + Discriminator.Type = BDebug.getTypeId(BaseTy); + + Members.push_back(Discriminator); + } + } + // Add struct/union members. const DINodeArray Elements = STy->getElements(); for (const auto *Element : Elements) { struct BTF::BTFMember BTFMember; - const auto *DDTy = cast<DIDerivedType>(Element); - BTFMember.NameOff = BDebug.addString(DDTy->getName()); - if (HasBitField) { - uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0; - BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits(); - } else { - BTFMember.Offset = DDTy->getOffsetInBits(); + switch (Element->getTag()) { + case dwarf::DW_TAG_member: { + const auto *DDTy = cast<DIDerivedType>(Element); + + BTFMember.NameOff = BDebug.addString(DDTy->getName()); + if (HasBitField) { + uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0; + BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits(); + } else { + BTFMember.Offset = DDTy->getOffsetInBits(); + } + const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType()); + BTFMember.Type = BDebug.getTypeId(BaseTy); + break; + } + case dwarf::DW_TAG_variant_part: { + const auto *DCTy = dyn_cast<DICompositeType>(Element); + + BTFMember.NameOff = BDebug.addString(DCTy->getName()); + BTFMember.Offset = DCTy->getOffsetInBits(); + BTFMember.Type = BDebug.getTypeId(DCTy); + break; + } + default: + llvm_unreachable("Unexpected DI tag of a struct/union element"); } - const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType()); - BTFMember.Type = BDebug.getTypeId(BaseTy); Members.push_back(BTFMember); } } @@ -672,16 +712,28 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, uint32_t &TypeId) { const DINodeArray Elements = CTy->getElements(); uint32_t VLen = Elements.size(); + // Variant parts might have a discriminator. LLVM DI doesn't consider it as + // an element and instead keeps it as a separate reference. But we represent + // it as an element in BTF. + if (CTy->getTag() == dwarf::DW_TAG_variant_part) { + const auto *DTy = CTy->getDiscriminator(); + if (DTy) { + visitTypeEntry(DTy); + VLen++; + } + } if (VLen > BTF::MAX_VLEN) return; // Check whether we have any bitfield members or not bool HasBitField = false; for (const auto *Element : Elements) { - auto E = cast<DIDerivedType>(Element); - if (E->isBitField()) { - HasBitField = true; - break; + if (Element->getTag() == dwarf::DW_TAG_member) { + auto E = cast<DIDerivedType>(Element); + if (E->isBitField()) { + HasBitField = true; + break; + } } } @@ -696,9 +748,22 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, // Visit all struct members. int FieldNo = 0; for (const auto *Element : Elements) { - const auto Elem = cast<DIDerivedType>(Element); - visitTypeEntry(Elem); - processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo); + switch (Element->getTag()) { + case dwarf::DW_TAG_member: { + const auto Elem = cast<DIDerivedType>(Element); + visitTypeEntry(Elem); + processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo); + break; + } + case dwarf::DW_TAG_variant_part: { + const auto Elem = cast<DICompositeType>(Element); + visitTypeEntry(Elem); + processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo); + break; + } + default: + llvm_unreachable("Unexpected DI tag of a struct/union element"); + } FieldNo++; } } @@ -781,16 +846,25 @@ void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion, void BTFDebug::visitCompositeType(const DICompositeType *CTy, uint32_t &TypeId) { auto Tag = CTy->getTag(); - if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { + switch (Tag) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_variant_part: // Handle forward declaration differently as it does not have members. if (CTy->isForwardDecl()) visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type, TypeId); else visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type, TypeId); - } else if (Tag == dwarf::DW_TAG_array_type) + break; + case dwarf::DW_TAG_array_type: visitArrayType(CTy, TypeId); - else if (Tag == dwarf::DW_TAG_enumeration_type) + break; + case dwarf::DW_TAG_enumeration_type: visitEnumType(CTy, TypeId); + break; + default: + llvm_unreachable("Unexpected DI tag of a composite type"); + } } bool BTFDebug::IsForwardDeclCandidate(const DIType *Base) { diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index ab93bba..b00589a 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -68,7 +68,7 @@ const llvm::StringRef RISCVSEWInstrument::DESC_NAME = "RISCV-SEW"; bool RISCVSEWInstrument::isDataValid(llvm::StringRef Data) { // Return true if not one of the valid SEW strings return StringSwitch<bool>(Data) - .Cases("E8", "E16", "E32", "E64", true) + .Cases({"E8", "E16", "E32", "E64"}, true) .Default(false); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 66717b9..7c89686 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1511,16 +1511,16 @@ def GIShiftMask32 : GIComplexOperandMatcher<s64, "selectShiftMask32">, GIComplexPatternEquiv<shiftMask32>; -class shiftop<SDPatternOperator operator> - : PatFrag<(ops node:$val, node:$count), - (operator node:$val, (XLenVT (shiftMaskXLen node:$count)))>; -class shiftopw<SDPatternOperator operator> - : PatFrag<(ops node:$val, node:$count), - (operator node:$val, (i64 (shiftMask32 node:$count)))>; +class PatGprShiftMaskXLen<SDPatternOperator OpNode, RVInst Inst> + : Pat<(OpNode GPR:$rs1, shiftMaskXLen:$rs2), + (Inst GPR:$rs1, shiftMaskXLen:$rs2)>; +class PatGprShiftMask32<SDPatternOperator OpNode, RVInst Inst> + : Pat<(OpNode GPR:$rs1, shiftMask32:$rs2), + (Inst GPR:$rs1, shiftMask32:$rs2)>; -def : PatGprGpr<shiftop<shl>, SLL>; -def : PatGprGpr<shiftop<srl>, SRL>; -def : PatGprGpr<shiftop<sra>, SRA>; +def : PatGprShiftMaskXLen<shl, SLL>; +def : PatGprShiftMaskXLen<srl, SRL>; +def : PatGprShiftMaskXLen<sra, SRA>; // This is a special case of the ADD instruction used to facilitate the use of a // fourth operand to emit a relocation on a symbol relating to this instruction. @@ -2203,9 +2203,9 @@ def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt), def : Pat<(i64 (sra (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt)), (SRAIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>; -def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>; -def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>; -def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>; +def : PatGprShiftMask32<riscv_sllw, SLLW>; +def : PatGprShiftMask32<riscv_srlw, SRLW>; +def : PatGprShiftMask32<riscv_sraw, SRAW>; // Select W instructions if only the lower 32 bits of the result are used. def : PatGprGpr<binop_allwusers<add>, ADDW>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 57fbaa0..40d7341 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -506,8 +506,8 @@ def : Pat<(XLenVT (xor GPR:$rs1, invLogicImm:$rs2)), (XNOR GPR:$rs1, invLogicImm } // Predicates = [HasStdExtZbbOrZbkb] let Predicates = [HasStdExtZbbOrZbkb] in { -def : PatGprGpr<shiftop<rotl>, ROL>; -def : PatGprGpr<shiftop<rotr>, ROR>; +def : PatGprShiftMaskXLen<rotl, ROL>; +def : PatGprShiftMaskXLen<rotr, ROR>; def : PatGprImm<rotr, RORI, uimmlog2xlen>; // There's no encoding for roli in the the 'B' extension as it can be @@ -517,29 +517,29 @@ def : Pat<(XLenVT (rotl GPR:$rs1, uimmlog2xlen:$shamt)), } // Predicates = [HasStdExtZbbOrZbkb] let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { -def : PatGprGpr<shiftopw<riscv_rolw>, ROLW>; -def : PatGprGpr<shiftopw<riscv_rorw>, RORW>; +def : PatGprShiftMask32<riscv_rolw, ROLW>; +def : PatGprShiftMask32<riscv_rorw, RORW>; def : PatGprImm<riscv_rorw, RORIW, uimm5>; def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; } // Predicates = [HasStdExtZbbOrZbkb, IsRV64] let Predicates = [HasStdExtZbs] in { -def : Pat<(XLenVT (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)), - (BCLR GPR:$rs1, GPR:$rs2)>; +def : Pat<(XLenVT (and (not (shl 1, shiftMaskXLen:$rs2)), GPR:$rs1)), + (BCLR GPR:$rs1, shiftMaskXLen:$rs2)>; def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)), (BCLR GPR:$rs1, GPR:$rs2)>; -def : Pat<(XLenVT (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)), - (BSET GPR:$rs1, GPR:$rs2)>; -def : Pat<(XLenVT (xor (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)), - (BINV GPR:$rs1, GPR:$rs2)>; -def : Pat<(XLenVT (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)), - (BEXT GPR:$rs1, GPR:$rs2)>; - -def : Pat<(XLenVT (shiftop<shl> 1, (XLenVT GPR:$rs2))), - (BSET (XLenVT X0), GPR:$rs2)>; -def : Pat<(XLenVT (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))), - (ADDI (XLenVT (BSET (XLenVT X0), GPR:$rs2)), -1)>; +def : Pat<(XLenVT (or (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)), + (BSET GPR:$rs1, shiftMaskXLen:$rs2)>; +def : Pat<(XLenVT (xor (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)), + (BINV GPR:$rs1, shiftMaskXLen:$rs2)>; +def : Pat<(XLenVT (and (srl GPR:$rs1, shiftMaskXLen:$rs2), 1)), + (BEXT GPR:$rs1, shiftMaskXLen:$rs2)>; + +def : Pat<(XLenVT (shl 1, shiftMaskXLen:$rs2)), + (BSET (XLenVT X0), shiftMaskXLen:$rs2)>; +def : Pat<(XLenVT (not (shl -1, shiftMaskXLen:$rs2))), + (ADDI (XLenVT (BSET (XLenVT X0), shiftMaskXLen:$rs2)), -1)>; def : Pat<(XLenVT (and GPR:$rs1, BCLRMask:$mask)), (BCLRI GPR:$rs1, BCLRMask:$mask)>; diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index dbe8e18..d91923b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -507,7 +507,9 @@ static Register buildLoadInst(SPIRVType *BaseType, Register PtrRegister, static Register buildBuiltinVariableLoad( MachineIRBuilder &MIRBuilder, SPIRVType *VariableType, SPIRVGlobalRegistry *GR, SPIRV::BuiltIn::BuiltIn BuiltinValue, LLT LLType, - Register Reg = Register(0), bool isConst = true, bool hasLinkageTy = true) { + Register Reg = Register(0), bool isConst = true, + const std::optional<SPIRV::LinkageType::LinkageType> &LinkageTy = { + SPIRV::LinkageType::Import}) { Register NewRegister = MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::pIDRegClass); MIRBuilder.getMRI()->setType( @@ -521,9 +523,8 @@ static Register buildBuiltinVariableLoad( // Set up the global OpVariable with the necessary builtin decorations. Register Variable = GR->buildGlobalVariable( NewRegister, PtrType, getLinkStringForBuiltIn(BuiltinValue), nullptr, - SPIRV::StorageClass::Input, nullptr, /* isConst= */ isConst, - /* HasLinkageTy */ hasLinkageTy, SPIRV::LinkageType::Import, MIRBuilder, - false); + SPIRV::StorageClass::Input, nullptr, /* isConst= */ isConst, LinkageTy, + MIRBuilder, false); // Load the value from the global variable. Register LoadedRegister = @@ -1851,7 +1852,7 @@ static bool generateWaveInst(const SPIRV::IncomingCall *Call, return buildBuiltinVariableLoad( MIRBuilder, Call->ReturnType, GR, Value, LLType, Call->ReturnRegister, - /* isConst= */ false, /* hasLinkageTy= */ false); + /* isConst= */ false, /* LinkageType= */ std::nullopt); } // We expect a builtin diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 1a7c02c..9e11c3a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -479,19 +479,9 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, .addImm(static_cast<uint32_t>(getExecutionModel(*ST, F))) .addUse(FuncVReg); addStringImm(F.getName(), MIB); - } else if (F.getLinkage() != GlobalValue::InternalLinkage && - F.getLinkage() != GlobalValue::PrivateLinkage && - F.getVisibility() != GlobalValue::HiddenVisibility) { - SPIRV::LinkageType::LinkageType LnkTy = - F.isDeclaration() - ? SPIRV::LinkageType::Import - : (F.getLinkage() == GlobalValue::LinkOnceODRLinkage && - ST->canUseExtension( - SPIRV::Extension::SPV_KHR_linkonce_odr) - ? SPIRV::LinkageType::LinkOnceODR - : SPIRV::LinkageType::Export); + } else if (const auto LnkTy = getSpirvLinkageTypeFor(*ST, F)) { buildOpDecorate(FuncVReg, MIRBuilder, SPIRV::Decoration::LinkageAttributes, - {static_cast<uint32_t>(LnkTy)}, F.getName()); + {static_cast<uint32_t>(*LnkTy)}, F.getName()); } // Handle function pointers decoration diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 6fd1c7e..6181abb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -712,9 +712,9 @@ SPIRVGlobalRegistry::buildConstantSampler(Register ResReg, unsigned AddrMode, Register SPIRVGlobalRegistry::buildGlobalVariable( Register ResVReg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV, SPIRV::StorageClass::StorageClass Storage, - const MachineInstr *Init, bool IsConst, bool HasLinkageTy, - SPIRV::LinkageType::LinkageType LinkageType, MachineIRBuilder &MIRBuilder, - bool IsInstSelector) { + const MachineInstr *Init, bool IsConst, + const std::optional<SPIRV::LinkageType::LinkageType> &LinkageType, + MachineIRBuilder &MIRBuilder, bool IsInstSelector) { const GlobalVariable *GVar = nullptr; if (GV) { GVar = cast<const GlobalVariable>(GV); @@ -792,9 +792,9 @@ Register SPIRVGlobalRegistry::buildGlobalVariable( buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::Alignment, {Alignment}); } - if (HasLinkageTy) + if (LinkageType) buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::LinkageAttributes, - {static_cast<uint32_t>(LinkageType)}, Name); + {static_cast<uint32_t>(*LinkageType)}, Name); SPIRV::BuiltIn::BuiltIn BuiltInId; if (getSpirvBuiltInIdByName(Name, BuiltInId)) @@ -821,8 +821,8 @@ Register SPIRVGlobalRegistry::getOrCreateGlobalVariableWithBinding( MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::iIDRegClass); buildGlobalVariable(VarReg, VarType, Name, nullptr, - getPointerStorageClass(VarType), nullptr, false, false, - SPIRV::LinkageType::Import, MIRBuilder, false); + getPointerStorageClass(VarType), nullptr, false, + std::nullopt, MIRBuilder, false); buildOpDecorate(VarReg, MIRBuilder, SPIRV::Decoration::DescriptorSet, {Set}); buildOpDecorate(VarReg, MIRBuilder, SPIRV::Decoration::Binding, {Binding}); diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index a648def..c230e62 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -548,14 +548,12 @@ public: MachineIRBuilder &MIRBuilder); Register getOrCreateUndef(MachineInstr &I, SPIRVType *SpvType, const SPIRVInstrInfo &TII); - Register buildGlobalVariable(Register Reg, SPIRVType *BaseType, - StringRef Name, const GlobalValue *GV, - SPIRV::StorageClass::StorageClass Storage, - const MachineInstr *Init, bool IsConst, - bool HasLinkageTy, - SPIRV::LinkageType::LinkageType LinkageType, - MachineIRBuilder &MIRBuilder, - bool IsInstSelector); + Register buildGlobalVariable( + Register Reg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV, + SPIRV::StorageClass::StorageClass Storage, const MachineInstr *Init, + bool IsConst, + const std::optional<SPIRV::LinkageType::LinkageType> &LinkageType, + MachineIRBuilder &MIRBuilder, bool IsInstSelector); Register getOrCreateGlobalVariableWithBinding(const SPIRVType *VarType, uint32_t Set, uint32_t Binding, StringRef Name, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index a0cff4d..5591d9f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -4350,15 +4350,8 @@ bool SPIRVInstructionSelector::selectGlobalValue( if (hasInitializer(GlobalVar) && !Init) return true; - bool HasLnkTy = !GV->hasInternalLinkage() && !GV->hasPrivateLinkage() && - !GV->hasHiddenVisibility(); - SPIRV::LinkageType::LinkageType LnkType = - GV->isDeclarationForLinker() - ? SPIRV::LinkageType::Import - : (GV->hasLinkOnceODRLinkage() && - STI.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr) - ? SPIRV::LinkageType::LinkOnceODR - : SPIRV::LinkageType::Export); + const std::optional<SPIRV::LinkageType::LinkageType> LnkType = + getSpirvLinkageTypeFor(STI, *GV); const unsigned AddrSpace = GV->getAddressSpace(); SPIRV::StorageClass::StorageClass StorageClass = @@ -4366,7 +4359,7 @@ bool SPIRVInstructionSelector::selectGlobalValue( SPIRVType *ResType = GR.getOrCreateSPIRVPointerType(GVType, I, StorageClass); Register Reg = GR.buildGlobalVariable( ResVReg, ResType, GlobalIdent, GV, StorageClass, Init, - GlobalVar->isConstant(), HasLnkTy, LnkType, MIRBuilder, true); + GlobalVar->isConstant(), LnkType, MIRBuilder, true); return Reg.isValid(); } @@ -4517,8 +4510,8 @@ bool SPIRVInstructionSelector::loadVec3BuiltinInputID( // builtin variable. Register Variable = GR.buildGlobalVariable( NewRegister, PtrType, getLinkStringForBuiltIn(BuiltInValue), nullptr, - SPIRV::StorageClass::Input, nullptr, true, false, - SPIRV::LinkageType::Import, MIRBuilder, false); + SPIRV::StorageClass::Input, nullptr, true, std::nullopt, MIRBuilder, + false); // Create new register for loading value. MachineRegisterInfo *MRI = MIRBuilder.getMRI(); @@ -4570,8 +4563,8 @@ bool SPIRVInstructionSelector::loadBuiltinInputID( // builtin variable. Register Variable = GR.buildGlobalVariable( NewRegister, PtrType, getLinkStringForBuiltIn(BuiltInValue), nullptr, - SPIRV::StorageClass::Input, nullptr, true, false, - SPIRV::LinkageType::Import, MIRBuilder, false); + SPIRV::StorageClass::Input, nullptr, true, std::nullopt, MIRBuilder, + false); // Load uint value from the global variable. auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad)) diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 61a0bbe..f7cdfcb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -547,9 +547,9 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI, if (MI.getOpcode() == SPIRV::OpDecorate) { // If it's got Import linkage. auto Dec = MI.getOperand(1).getImm(); - if (Dec == static_cast<unsigned>(SPIRV::Decoration::LinkageAttributes)) { + if (Dec == SPIRV::Decoration::LinkageAttributes) { auto Lnk = MI.getOperand(MI.getNumOperands() - 1).getImm(); - if (Lnk == static_cast<unsigned>(SPIRV::LinkageType::Import)) { + if (Lnk == SPIRV::LinkageType::Import) { // Map imported function name to function ID register. const Function *ImportedFunc = F->getParent()->getFunction(getStringImm(MI, 2)); @@ -635,7 +635,7 @@ static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { InstrTraces IS; for (auto F = M.begin(), E = M.end(); F != E; ++F) { - if ((*F).isDeclaration()) + if (F->isDeclaration()) continue; MachineFunction *MF = MMI->getMachineFunction(*F); assert(MF); diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index d8376cd..2d19f6de 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -169,9 +169,7 @@ struct ModuleAnalysisInfo { MCRegister getFuncReg(const Function *F) { assert(F && "Function is null"); - auto FuncPtrRegPair = FuncMap.find(F); - return FuncPtrRegPair == FuncMap.end() ? MCRegister() - : FuncPtrRegPair->second; + return FuncMap.lookup(F); } MCRegister getExtInstSetReg(unsigned SetNum) { return ExtInstSetMap[SetNum]; } InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; } diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 1d47c89..4e2cc88 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -1040,4 +1040,19 @@ getFirstValidInstructionInsertPoint(MachineBasicBlock &BB) { : VarPos; } +std::optional<SPIRV::LinkageType::LinkageType> +getSpirvLinkageTypeFor(const SPIRVSubtarget &ST, const GlobalValue &GV) { + if (GV.hasLocalLinkage() || GV.hasHiddenVisibility()) + return std::nullopt; + + if (GV.isDeclarationForLinker()) + return SPIRV::LinkageType::Import; + + if (GV.hasLinkOnceODRLinkage() && + ST.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr)) + return SPIRV::LinkageType::LinkOnceODR; + + return SPIRV::LinkageType::Export; +} + } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 5777a24..99d9d40 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -559,5 +559,8 @@ unsigned getArrayComponentCount(const MachineRegisterInfo *MRI, const MachineInstr *ResType); MachineBasicBlock::iterator getFirstValidInstructionInsertPoint(MachineBasicBlock &BB); + +std::optional<SPIRV::LinkageType::LinkageType> +getSpirvLinkageTypeFor(const SPIRVSubtarget &ST, const GlobalValue &GV); } // namespace llvm #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 8e08d16..a1fd366 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1164,7 +1164,6 @@ def ProcessorFeatures { FeatureAVXNECONVERT, FeatureAVXVNNIINT8, FeatureAVXVNNIINT16, - FeatureUSERMSR, FeatureSHA512, FeatureSM3, FeatureEGPR, diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp index 08944e6..7882045 100644 --- a/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -235,16 +235,16 @@ ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(ARM::FPUKind FPUKind) { StringRef ARM::getFPUSynonym(StringRef FPU) { return StringSwitch<StringRef>(FPU) - .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported + .Cases({"fpa", "fpe2", "fpe3", "maverick"}, "invalid") // Unsupported .Case("vfp2", "vfpv2") .Case("vfp3", "vfpv3") .Case("vfp4", "vfpv4") .Case("vfp3-d16", "vfpv3-d16") .Case("vfp4-d16", "vfpv4-d16") - .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16") - .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16") + .Cases({"fp4-sp-d16", "vfpv4-sp-d16"}, "fpv4-sp-d16") + .Cases({"fp4-dp-d16", "fpv4-dp-d16"}, "vfpv4-d16") .Case("fp5-sp-d16", "fpv5-sp-d16") - .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16") + .Cases({"fp5-dp-d16", "fpv5-dp-d16"}, "fpv5-d16") // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3. .Case("neon-vfpv3", "neon") .Default(FPU); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index dd13ce3..b13c795 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -143,8 +143,7 @@ constexpr FeatureBitset FeaturesDiamondRapids = FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 | FeaturePPX | FeatureNDD | FeatureNF | FeatureMOVRS | FeatureAMX_MOVRS | - FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 | - FeatureAMX_TRANSPOSE | FeatureUSERMSR; + FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 | FeatureAMX_TRANSPOSE; // Intel Atom processors. // Bonnell has feature parity with Core2 and adds MOVBE. diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index b9534def..a06f832 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -430,6 +430,7 @@ public: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::BitCast: case Instruction::AddrSpaceCast: diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index fa66a03..23e1243 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -227,6 +227,7 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I, case Instruction::Call: case Instruction::BitCast: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::AddrSpaceCast: case Instruction::FPToUI: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 048a3e6..3f18bd7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10546,8 +10546,11 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL, PoisonValue::get(UniqueValues.front()->getType())); // Check that extended with poisons/copyable operations are still valid // for vectorization (div/rem are not allowed). - if (!S.areInstructionsWithCopyableElements() && - !getSameOpcode(PaddedUniqueValues, TLI).valid()) { + if ((!S.areInstructionsWithCopyableElements() && + !getSameOpcode(PaddedUniqueValues, TLI).valid()) || + (S.areInstructionsWithCopyableElements() && S.isMulDivLikeOp() && + (S.getMainOp()->isIntDivRem() || S.getMainOp()->isFPDivRem() || + isa<CallInst>(S.getMainOp())))) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); ReuseShuffleIndices.clear(); return false; diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp index 1fc8974..09468da 100644 --- a/llvm/lib/WindowsDriver/MSVCPaths.cpp +++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp @@ -259,9 +259,7 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName, #endif // _WIN32 } -namespace llvm { - -const char *archToWindowsSDKArch(Triple::ArchType Arch) { +const char *llvm::archToWindowsSDKArch(Triple::ArchType Arch) { switch (Arch) { case Triple::ArchType::x86: return "x86"; @@ -277,7 +275,7 @@ const char *archToWindowsSDKArch(Triple::ArchType Arch) { } } -const char *archToLegacyVCArch(Triple::ArchType Arch) { +const char *llvm::archToLegacyVCArch(Triple::ArchType Arch) { switch (Arch) { case Triple::ArchType::x86: // x86 is default in legacy VC toolchains. @@ -295,7 +293,7 @@ const char *archToLegacyVCArch(Triple::ArchType Arch) { } } -const char *archToDevDivInternalArch(Triple::ArchType Arch) { +const char *llvm::archToDevDivInternalArch(Triple::ArchType Arch) { switch (Arch) { case Triple::ArchType::x86: return "i386"; @@ -311,8 +309,9 @@ const char *archToDevDivInternalArch(Triple::ArchType Arch) { } } -bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath, - Triple::ArchType Arch, std::string &path) { +bool llvm::appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath, + Triple::ArchType Arch, + std::string &path) { if (SDKMajor >= 8) { sys::path::append(LibPath, archToWindowsSDKArch(Arch)); } else { @@ -336,10 +335,11 @@ bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath, return true; } -std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout, - const std::string &VCToolChainPath, - Triple::ArchType TargetArch, - StringRef SubdirParent) { +std::string llvm::getSubDirectoryPath(SubDirectoryType Type, + ToolsetLayout VSLayout, + const std::string &VCToolChainPath, + Triple::ArchType TargetArch, + StringRef SubdirParent) { const char *SubdirName; const char *IncludeName; switch (VSLayout) { @@ -390,19 +390,22 @@ std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout, return std::string(Path); } -bool useUniversalCRT(ToolsetLayout VSLayout, const std::string &VCToolChainPath, - Triple::ArchType TargetArch, vfs::FileSystem &VFS) { +bool llvm::useUniversalCRT(ToolsetLayout VSLayout, + const std::string &VCToolChainPath, + Triple::ArchType TargetArch, vfs::FileSystem &VFS) { SmallString<128> TestPath(getSubDirectoryPath( SubDirectoryType::Include, VSLayout, VCToolChainPath, TargetArch)); sys::path::append(TestPath, "stdlib.h"); return !VFS.exists(TestPath); } -bool getWindowsSDKDir(vfs::FileSystem &VFS, std::optional<StringRef> WinSdkDir, - std::optional<StringRef> WinSdkVersion, - std::optional<StringRef> WinSysRoot, std::string &Path, - int &Major, std::string &WindowsSDKIncludeVersion, - std::string &WindowsSDKLibVersion) { +bool llvm::getWindowsSDKDir(vfs::FileSystem &VFS, + std::optional<StringRef> WinSdkDir, + std::optional<StringRef> WinSdkVersion, + std::optional<StringRef> WinSysRoot, + std::string &Path, int &Major, + std::string &WindowsSDKIncludeVersion, + std::string &WindowsSDKLibVersion) { // Trust /winsdkdir and /winsdkversion if present. if (getWindowsSDKDirViaCommandLine(VFS, WinSdkDir, WinSdkVersion, WinSysRoot, Path, Major, WindowsSDKIncludeVersion)) { @@ -460,11 +463,11 @@ bool getWindowsSDKDir(vfs::FileSystem &VFS, std::optional<StringRef> WinSdkDir, return false; } -bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, - std::optional<StringRef> WinSdkDir, - std::optional<StringRef> WinSdkVersion, - std::optional<StringRef> WinSysRoot, - std::string &Path, std::string &UCRTVersion) { +bool llvm::getUniversalCRTSdkDir(vfs::FileSystem &VFS, + std::optional<StringRef> WinSdkDir, + std::optional<StringRef> WinSdkVersion, + std::optional<StringRef> WinSysRoot, + std::string &Path, std::string &UCRTVersion) { // If /winsdkdir is passed, use it as location for the UCRT too. // FIXME: Should there be a dedicated /ucrtdir to override /winsdkdir? int Major; @@ -491,11 +494,11 @@ bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, return getWindows10SDKVersionFromPath(VFS, Path, UCRTVersion); } -bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS, - std::optional<StringRef> VCToolsDir, - std::optional<StringRef> VCToolsVersion, - std::optional<StringRef> WinSysRoot, - std::string &Path, ToolsetLayout &VSLayout) { +bool llvm::findVCToolChainViaCommandLine( + vfs::FileSystem &VFS, std::optional<StringRef> VCToolsDir, + std::optional<StringRef> VCToolsVersion, + std::optional<StringRef> WinSysRoot, std::string &Path, + ToolsetLayout &VSLayout) { // Don't validate the input; trust the value supplied by the user. // The primary motivation is to prevent unnecessary file and registry access. if (VCToolsDir || WinSysRoot) { @@ -518,8 +521,9 @@ bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS, return false; } -bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path, - ToolsetLayout &VSLayout) { +bool llvm::findVCToolChainViaEnvironment(vfs::FileSystem &VFS, + std::string &Path, + ToolsetLayout &VSLayout) { // These variables are typically set by vcvarsall.bat // when launching a developer command prompt. if (std::optional<std::string> VCToolsInstallDir = @@ -627,9 +631,9 @@ bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path, return false; } -bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, - std::optional<StringRef> VCToolsVersion, - std::string &Path, ToolsetLayout &VSLayout) { +bool llvm::findVCToolChainViaSetupConfig( + vfs::FileSystem &VFS, std::optional<StringRef> VCToolsVersion, + std::string &Path, ToolsetLayout &VSLayout) { #if !defined(USE_MSVC_SETUP_API) return false; #else @@ -724,7 +728,8 @@ bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, #endif } -bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) { +bool llvm::findVCToolChainViaRegistry(std::string &Path, + ToolsetLayout &VSLayout) { std::string VSInstallPath; if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)", "InstallDir", VSInstallPath, nullptr) || @@ -744,5 +749,3 @@ bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) { } return false; } - -} // namespace llvm diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll index 3c3748a..1964fca 100644 --- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll @@ -104,3 +104,56 @@ exit: } declare void @clobber() + + +declare void @clobber.i32(i32) + +define void @test_guards_across_loops(i32 %N) { +; CHECK-LABEL: 'test_guards_across_loops' +; CHECK-NEXT: Classifying expressions for: @test_guards_across_loops +; CHECK-NEXT: %iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop.1 ] +; CHECK-NEXT: --> {0,+,1}<%loop.1> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop.1: Computable } +; CHECK-NEXT: %iv.1.next = add i32 %iv.1, 1 +; CHECK-NEXT: --> {1,+,1}<%loop.1> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop.1: Computable } +; CHECK-NEXT: %iv.2 = phi i32 [ 0, %loop.1 ], [ %iv.2.next, %loop.2 ] +; CHECK-NEXT: --> {0,+,1}<nuw><%loop.2> U: full-set S: full-set Exits: (1 + %N) LoopDispositions: { %loop.2: Computable } +; CHECK-NEXT: %iv.2.next = add i32 %iv.2, 1 +; CHECK-NEXT: --> {1,+,1}<nw><%loop.2> U: full-set S: full-set Exits: (2 + %N) LoopDispositions: { %loop.2: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guards_across_loops +; CHECK-NEXT: Loop %loop.2: backedge-taken count is (1 + (zext i32 %N to i64))<nuw><nsw> +; CHECK-NEXT: Loop %loop.2: constant max backedge-taken count is i64 4294967296 +; CHECK-NEXT: Loop %loop.2: symbolic max backedge-taken count is (1 + (zext i32 %N to i64))<nuw><nsw> +; CHECK-NEXT: Loop %loop.2: Trip multiple is 1 +; CHECK-NEXT: Loop %loop.1: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop.1: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop.1: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop.1: Predicated backedge-taken count is (1 + (zext i32 %N to i64))<nuw><nsw> +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%loop.1> Added Flags: <nusw> +; CHECK-NEXT: Loop %loop.1: Predicated constant max backedge-taken count is i64 4294967296 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%loop.1> Added Flags: <nusw> +; CHECK-NEXT: Loop %loop.1: Predicated symbolic max backedge-taken count is (1 + (zext i32 %N to i64))<nuw><nsw> +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%loop.1> Added Flags: <nusw> +; +entry: + br label %loop.1 + +loop.1: + %iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop.1 ] + call void @clobber.i32(i32 %iv.1) + %ec.1 = icmp ugt i32 %iv.1, %N + %iv.1.next = add i32 %iv.1, 1 + br i1 %ec.1, label %loop.2, label %loop.1 + +loop.2: + %iv.2 = phi i32 [ 0, %loop.1 ], [ %iv.2.next, %loop.2 ] + call void @clobber.i32(i32 %iv.2) + %ec.2 = icmp ugt i32 %iv.2, %N + %iv.2.next = add i32 %iv.2, 1 + br i1 %ec.2, label %exit, label %loop.2 + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll index a477465c..f35c48b 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll @@ -717,5 +717,46 @@ exit: ret void } +define void @test_urem_non_constant(ptr %dst, i32 %a, i32 %b) { +; CHECK-LABEL: 'test_urem_non_constant' +; CHECK-NEXT: Classifying expressions for: @test_urem_non_constant +; CHECK-NEXT: %rem = urem i32 %a, %b +; CHECK-NEXT: --> ((-1 * (%a /u %b) * %b) + %a) U: full-set S: full-set +; CHECK-NEXT: %and.0 = and i1 %pre.0, %pre.1 +; CHECK-NEXT: --> (%pre.1 umin %pre.0) U: full-set S: full-set +; CHECK-NEXT: %and.1 = and i1 %and.0, %pre.2 +; CHECK-NEXT: --> (%pre.1 umin %pre.2 umin %pre.0) U: full-set S: full-set +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {0,+,%b}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep.dst = getelementptr inbounds i8, ptr %dst, i32 %b +; CHECK-NEXT: --> ((sext i32 %b to i64) + %dst) U: full-set S: full-set Exits: ((sext i32 %b to i64) + %dst) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: %iv.next = add i32 %iv, %b +; CHECK-NEXT: --> {%b,+,%b}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_urem_non_constant +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; +entry: + %rem = urem i32 %a, %b + %pre.0 = icmp eq i32 %rem, 0 + %pre.1 = icmp ne i32 %a, 0 + %pre.2 = icmp ne i32 %b, 0 + %and.0 = and i1 %pre.0, %pre.1 + %and.1 = and i1 %and.0, %pre.2 + br i1 %and.1, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ] + %gep.dst = getelementptr inbounds i8, ptr %dst, i32 %b + store i8 0, ptr %gep.dst + %iv.next = add i32 %iv, %b + %ec = icmp ne i32 %iv.next, %a + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + declare void @llvm.assume(i1) declare void @llvm.experimental.guard(i1, ...) diff --git a/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir b/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir index 284d624..f34d3ed 100644 --- a/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir +++ b/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir @@ -1,16 +1,12 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,-zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \ -# RUN: | FileCheck --check-prefix=CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ %s -# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,-zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \ -# RUN: | FileCheck --check-prefix=CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ %s -# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,+zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \ -# RUN: | FileCheck --check-prefix=CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ %s -# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,+zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \ -# RUN: | FileCheck --check-prefix=CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ %s -# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,-zcm-gpr64,+zcz-gpr32,+zcz-gpr64" %s \ -# RUN: | FileCheck --check-prefix=CHECK-NO-ZCM-ZCZ %s -# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,+zcm-gpr64,+zcz-gpr32,+zcz-gpr64" %s \ -# RUN: | FileCheck --check-prefix=CHECK-ZCM-ZCZ %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcz-gpr32,-zcz-gpr64" %s \ +# RUN: | FileCheck --check-prefix=CHECK-NOZCZ-GPR32-NOZCZ-GPR64 %s +# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcz-gpr32,-zcz-gpr64" %s \ +# RUN: | FileCheck --check-prefix=CHECK-ZCZ-GPR32-NOZCZ-GPR64 %s +# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcz-gpr32,+zcz-gpr64" %s \ +# RUN: | FileCheck --check-prefix=CHECK-NOZCZ-GPR32-ZCZ-GPR64 %s +# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcz-gpr32,+zcz-gpr64" %s \ +# RUN: | FileCheck --check-prefix=CHECK-ZCZ-GPR32-ZCZ-GPR64 %s --- | define void @f0(i64 noundef %x) { ret void } @@ -24,41 +20,29 @@ liveins: body: | bb.0: liveins: $x0, $lr - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-LABEL: name: f0 - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: $w0 = ORRWrr $wzr, $wzr - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-LABEL: name: f0 + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64: liveins: $x0, $lr + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-NEXT: {{ $}} + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-NEXT: $w0 = ORRWrr $wzr, $wzr + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ; - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-LABEL: name: f0 - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: $w0 = ORRWrr $wzr, $wzr - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-LABEL: name: f0 + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64: liveins: $x0, $lr + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-NEXT: {{ $}} + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-NEXT: $w0 = MOVZWi 0, 0 + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ; - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-LABEL: name: f0 - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: $x0 = ORRXrr $xzr, undef $xzr, implicit $wzr - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-LABEL: name: f0 + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64: liveins: $x0, $lr + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: {{ $}} + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: $w0 = ORRWrr $wzr, $wzr + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ; - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-LABEL: name: f0 - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: $w0 = ORRWrr $wzr, $wzr - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 - ; - ; CHECK-NO-ZCM-ZCZ-LABEL: name: f0 - ; CHECK-NO-ZCM-ZCZ: liveins: $x0, $lr - ; CHECK-NO-ZCM-ZCZ-NEXT: {{ $}} - ; CHECK-NO-ZCM-ZCZ-NEXT: $w0 = MOVZWi 0, 0 - ; CHECK-NO-ZCM-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 - ; - ; CHECK-ZCM-ZCZ-LABEL: name: f0 - ; CHECK-ZCM-ZCZ: liveins: $x0, $lr - ; CHECK-ZCM-ZCZ-NEXT: {{ $}} - ; CHECK-ZCM-ZCZ-NEXT: $w0 = MOVZWi 0, 0 - ; CHECK-ZCM-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-LABEL: name: f0 + ; CHECK-ZCZ-GPR32-ZCZ-GPR64: liveins: $x0, $lr + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-NEXT: {{ $}} + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-NEXT: $w0 = MOVZWi 0, 0 + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 $w0 = COPY $wzr BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ... @@ -69,41 +53,29 @@ liveins: body: | bb.0: liveins: $x0, $lr - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-LABEL: name: f1 - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: $x0 = ORRXrr $xzr, $xzr - ; CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 - ; - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-LABEL: name: f1 - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: $x0 = ORRXrr $xzr, $xzr - ; CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 - ; - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-LABEL: name: f1 - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: $x0 = ORRXrr $xzr, $xzr - ; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-LABEL: name: f1 + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64: liveins: $x0, $lr + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-NEXT: {{ $}} + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-NEXT: $x0 = ORRXrr $xzr, $xzr + ; CHECK-NOZCZ-GPR32-NOZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ; - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-LABEL: name: f1 - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}} - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: $x0 = ORRXrr $xzr, $xzr - ; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-LABEL: name: f1 + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64: liveins: $x0, $lr + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-NEXT: {{ $}} + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-NEXT: $x0 = ORRXrr $xzr, $xzr + ; CHECK-ZCZ-GPR32-NOZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ; - ; CHECK-NO-ZCM-ZCZ-LABEL: name: f1 - ; CHECK-NO-ZCM-ZCZ: liveins: $x0, $lr - ; CHECK-NO-ZCM-ZCZ-NEXT: {{ $}} - ; CHECK-NO-ZCM-ZCZ-NEXT: $x0 = MOVZXi 0, 0 - ; CHECK-NO-ZCM-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-LABEL: name: f1 + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64: liveins: $x0, $lr + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: {{ $}} + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: $x0 = MOVZXi 0, 0 + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ; - ; CHECK-ZCM-ZCZ-LABEL: name: f1 - ; CHECK-ZCM-ZCZ: liveins: $x0, $lr - ; CHECK-ZCM-ZCZ-NEXT: {{ $}} - ; CHECK-ZCM-ZCZ-NEXT: $x0 = MOVZXi 0, 0 - ; CHECK-ZCM-ZCZ-NEXT:BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-LABEL: name: f1 + ; CHECK-ZCZ-GPR32-ZCZ-GPR64: liveins: $x0, $lr + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-NEXT: {{ $}} + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-NEXT: $x0 = MOVZXi 0, 0 + ; CHECK-ZCZ-GPR32-ZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 $x0 = COPY $xzr BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ... diff --git a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll index 97a7741..849323f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll @@ -421,3 +421,83 @@ for.body51: ; preds = %is_sbox.exit155 unreachable } declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, ptr nocapture) nounwind ssp + +; CHECK-LABEL: fold_imm1_csinc_32: +; CHECK: cmp w0, w1 +; CHECK-NEXT: csinc w0, w2, wzr, ge +; CHECK-NEXT: ret +define i32 @fold_imm1_csinc_32(i32 %x, i32 %y, i32 %n) nounwind ssp { +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i32 [ 1, %if.then ], [ %n, %if.else ] + ret i32 %result +} + +; CHECK-LABEL: fold_imm1_csinc_64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: csinc x0, x2, xzr, ge +; CHECK-NEXT: ret +define i64 @fold_imm1_csinc_64(i64 %x, i64 %y, i64 %n) nounwind ssp { +entry: + %cmp = icmp slt i64 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i64 [ 1, %if.then ], [ %n, %if.else ] + ret i64 %result +} + +; CHECK-LABEL: fold_imm1_cset_32: +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret +define i32 @fold_imm1_cset_32(i32 %x, i32 %y) nounwind ssp { +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i32 [ 1, %if.then ], [ 0, %if.else ] + ret i32 %result +} + +; CHECK-LABEL: fold_imm1_cset_64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: cset x0, lt +; CHECK-NEXT: ret +define i64 @fold_imm1_cset_64(i64 %x, i64 %y) nounwind ssp { +entry: + %cmp = icmp slt i64 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i64 [ 1, %if.then ], [ 0, %if.else ] + ret i64 %result +} diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll index 868b9f1..b0852580 100644 --- a/llvm/test/CodeGen/AArch64/peephole-csel.ll +++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll @@ -5,10 +5,9 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) { ; CHECK-LABEL: peephole_csel: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: tst w1, #0x1 -; CHECK-NEXT: csel x8, x8, x9, eq +; CHECK-NEXT: csinc x8, x8, xzr, ne ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/rax1.ll b/llvm/test/CodeGen/AArch64/rax1.ll index f679e90..4bb551f 100644 --- a/llvm/test/CodeGen/AArch64/rax1.ll +++ b/llvm/test/CodeGen/AArch64/rax1.ll @@ -1,22 +1,44 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s -; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefixes=SHA3,SHA3-SD %s +; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefixes=NOSHA3,NOSHA3-SD %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s -global-isel | FileCheck --check-prefixes=SHA3,SHA3-GI %s +; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s -global-isel | FileCheck --check-prefixes=NOSHA3,NOSHA3-GI %s define <2 x i64> @rax1(<2 x i64> %x, <2 x i64> %y) { -; SHA3-LABEL: rax1: -; SHA3: // %bb.0: -; SHA3-NEXT: rax1 v0.2d, v0.2d, v1.2d -; SHA3-NEXT: ret +; SHA3-SD-LABEL: rax1: +; SHA3-SD: // %bb.0: +; SHA3-SD-NEXT: rax1 v0.2d, v0.2d, v1.2d +; SHA3-SD-NEXT: ret ; -; NOSHA3-LABEL: rax1: -; NOSHA3: // %bb.0: -; NOSHA3-NEXT: add v2.2d, v1.2d, v1.2d -; NOSHA3-NEXT: usra v2.2d, v1.2d, #63 -; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b -; NOSHA3-NEXT: ret +; NOSHA3-SD-LABEL: rax1: +; NOSHA3-SD: // %bb.0: +; NOSHA3-SD-NEXT: add v2.2d, v1.2d, v1.2d +; NOSHA3-SD-NEXT: usra v2.2d, v1.2d, #63 +; NOSHA3-SD-NEXT: eor v0.16b, v0.16b, v2.16b +; NOSHA3-SD-NEXT: ret +; +; SHA3-GI-LABEL: rax1: +; SHA3-GI: // %bb.0: +; SHA3-GI-NEXT: shl v2.2d, v1.2d, #1 +; SHA3-GI-NEXT: ushr v1.2d, v1.2d, #63 +; SHA3-GI-NEXT: orr v1.16b, v2.16b, v1.16b +; SHA3-GI-NEXT: eor v0.16b, v0.16b, v1.16b +; SHA3-GI-NEXT: ret +; +; NOSHA3-GI-LABEL: rax1: +; NOSHA3-GI: // %bb.0: +; NOSHA3-GI-NEXT: shl v2.2d, v1.2d, #1 +; NOSHA3-GI-NEXT: ushr v1.2d, v1.2d, #63 +; NOSHA3-GI-NEXT: orr v1.16b, v2.16b, v1.16b +; NOSHA3-GI-NEXT: eor v0.16b, v0.16b, v1.16b +; NOSHA3-GI-NEXT: ret %a = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %y, <2 x i64> %y, <2 x i64> <i64 1, i64 1>) %b = xor <2 x i64> %x, %a ret <2 x i64> %b } declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; NOSHA3: {{.*}} +; SHA3: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll index 53b2542..142290a 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll @@ -3611,10 +3611,10 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) { ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 glc +; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: buffer_load_ushort v6, off, s[16:19], 0 offset:4 glc -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 8, v0 ; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_lshrrev_b32_e32 v7, 8, v0 ; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v6 ; SDAG-NEXT: v_lshrrev_b64 v[3:4], 24, v[0:1] ; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3627,12 +3627,12 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) { ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 glc +; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc -; GISEL-NEXT: s_waitcnt vmcnt(1) +; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -3652,6 +3652,7 @@ define void @volatile_store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) { ; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 +; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: buffer_store_short v4, off, s[16:19], 0 offset:4 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_setpc_b64 s[30:31] @@ -3671,6 +3672,7 @@ define void @volatile_store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) { ; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GISEL-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 +; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: buffer_store_short v2, off, s[16:19], 0 offset:4 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll index 5d03dfb..352af04 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll @@ -218,3 +218,174 @@ main_body: ret void } +define amdgpu_ps void @global_load_lds_dword_volatile(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: global_load_lds_dword_volatile: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 m0, s0 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: global_load_dword v[0:1], off glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v[0:1], off offset:256 lds +; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: global_load_lds_dword_volatile: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b32 m0, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:256 +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-NEXT: s_endpgm +; +; GFX10-LABEL: global_load_lds_dword_volatile: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 m0, s0 +; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_load_dword v[0:1], off offset:256 lds +; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX10-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: global_load_lds_dword_volatile: +; GFX942-GISEL: ; %bb.0: ; %main_body +; GFX942-GISEL-NEXT: s_mov_b32 m0, s0 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:256 +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-GISEL-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 256, i32 0) + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @buffer_load_lds_dword_volatile(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: buffer_load_lds_dword_volatile: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX90A-NEXT: s_mov_b32 m0, s5 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:256 lds +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: buffer_load_lds_dword_volatile: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-NEXT: s_mov_b32 m0, s5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:256 lds +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-NEXT: s_endpgm +; +; GFX10-LABEL: buffer_load_lds_dword_volatile: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 +; GFX10-NEXT: s_mov_b32 m0, s5 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:256 lds +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX10-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: buffer_load_lds_dword_volatile: +; GFX942-GISEL: ; %bb.0: ; %main_body +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:256 lds +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-GISEL-NEXT: s_endpgm +main_body: + %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 256, i32 0) + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @global_load_lds_dword_nontemporal(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: global_load_lds_dword_nontemporal: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: s_mov_b32 m0, s2 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: global_load_dword v0, s[0:1] glc slc lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: global_load_lds_dword_nontemporal: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v0, s[0:1] nt +; GFX942-NEXT: s_endpgm +; +; GFX10-LABEL: global_load_lds_dword_nontemporal: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_mov_b32 m0, s2 +; GFX10-NEXT: global_load_dword v0, s[0:1] slc lds +; GFX10-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: global_load_lds_dword_nontemporal: +; GFX942-GISEL: ; %bb.0: ; %main_body +; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: global_load_lds_dword v0, s[0:1] nt +; GFX942-GISEL-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 + ret void +} + +define amdgpu_ps void @buffer_load_lds_dword_nontemporal(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: buffer_load_lds_dword_nontemporal: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX90A-NEXT: s_mov_b32 m0, s5 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc slc lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: buffer_load_lds_dword_nontemporal: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-NEXT: s_mov_b32 m0, s5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds +; GFX942-NEXT: s_endpgm +; +; GFX10-LABEL: buffer_load_lds_dword_nontemporal: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 +; GFX10-NEXT: s_mov_b32 m0, s5 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen slc lds +; GFX10-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: buffer_load_lds_dword_nontemporal: +; GFX942-GISEL: ; %bb.0: ; %main_body +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds +; GFX942-GISEL-NEXT: s_endpgm +main_body: + %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 + ret void +} + +!0 = !{i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll index f0204bd..1dcd032 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll @@ -110,3 +110,43 @@ main_body: call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 1, i32 0, i32 0, i32 2048, i32 0) ret void } + +define amdgpu_ps float @buffer_load_lds_dword_volatile(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { +; GCN-LABEL: buffer_load_lds_dword_volatile: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 m0, s4 +; GCN-NEXT: s_nop 0 +; GCN-NEXT: buffer_load_dword off, s[0:3], 0 glc lds +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_dword off, s[0:3], 0 offset:256 lds +; GCN-NEXT: buffer_load_dword off, s[0:3], 0 offset:512 lds +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ds_read_b32 v0, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ; return to shader part epilog +main_body: + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 2147483648) + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 256, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 512, i32 0) + %res = load float, ptr addrspace(3) %lds + ret float %res +} + +define amdgpu_ps float @buffer_load_lds_dword_nontemporal(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { +; GCN-LABEL: buffer_load_lds_dword_nontemporal: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 m0, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: buffer_load_dword off, s[0:3], 0 glc slc lds +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ds_read_b32 v0, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ; return to shader part epilog +main_body: + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0), !nontemporal !0 + %res = load float, ptr addrspace(3) %lds + ret float %res +} + +!0 = !{i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll index b5d741b..a979999 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll @@ -80,25 +80,29 @@ define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_volatile(p ; PREGFX10-LABEL: buffer_load_volatile: ; PREGFX10: ; %bb.0: ; %main_body ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc +; PREGFX10-NEXT: s_waitcnt vmcnt(0) ; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc +; PREGFX10-NEXT: s_waitcnt vmcnt(0) ; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc ; PREGFX10-NEXT: s_waitcnt vmcnt(0) ; PREGFX10-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: buffer_load_volatile: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: buffer_load_volatile: ; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: s_clause 0x2 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 glc slc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll index 97db15b..1d1d3e4 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll @@ -107,6 +107,7 @@ define amdgpu_kernel void @buffer_last_use_and_volatile_load(ptr addrspace(7) %i ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] ; GFX12-NEXT: buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_BYPASS scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_clause 0x1 ; GFX12-NEXT: s_load_b32 s13, s[4:5], 0x30 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x20 @@ -120,7 +121,6 @@ define amdgpu_kernel void @buffer_last_use_and_volatile_load(ptr addrspace(7) %i ; GFX12-NEXT: s_mov_b32 s3, s12 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen ; GFX12-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll index 9dac239..1e4b633 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll @@ -354,6 +354,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX9-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[10:11] ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-SDAG-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen glc +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_load_dword s11, s[8:9], 0x30 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s10 @@ -365,8 +366,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX9-SDAG-NEXT: s_mov_b32 s3, s10 ; GFX9-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[10:11] ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -384,6 +385,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX9-GISEL-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen glc +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 ; GFX9-GISEL-NEXT: s_load_dword s7, s[8:9], 0x30 ; GFX9-GISEL-NEXT: s_mov_b32 s4, s11 @@ -395,8 +397,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX9-GISEL-NEXT: s_mov_b32 s10, s3 ; GFX9-GISEL-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: s_endpgm ; ; GFX942-SDAG-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -414,6 +416,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX942-SDAG-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13] ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-SDAG-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_load_dword s13, s[4:5], 0x30 ; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 ; GFX942-SDAG-NEXT: s_mov_b32 s5, s12 @@ -425,8 +428,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX942-SDAG-NEXT: s_mov_b32 s3, s12 ; GFX942-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_endpgm ; ; GFX942-GISEL-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -444,6 +447,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX942-GISEL-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] ; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-GISEL-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 ; GFX942-GISEL-NEXT: s_load_dword s9, s[4:5], 0x30 ; GFX942-GISEL-NEXT: s_mov_b32 s4, s7 @@ -455,8 +459,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX942-GISEL-NEXT: s_mov_b32 s6, s3 ; GFX942-GISEL-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, s0 -; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_endpgm ; ; GFX10-SDAG-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -475,6 +479,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX10-SDAG-NEXT: s_mov_b32 s11, s2 ; GFX10-SDAG-NEXT: s_or_b64 s[4:5], s[12:13], s[10:11] ; GFX10-SDAG-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen glc dlc +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_clause 0x1 ; GFX10-SDAG-NEXT: s_load_dword s11, s[8:9], 0x30 ; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 @@ -488,8 +493,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX10-SDAG-NEXT: s_mov_b32 s2, s1 ; GFX10-SDAG-NEXT: s_mov_b32 s3, s10 ; GFX10-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[10:11] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen +; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: s_endpgm ; ; GFX10-GISEL-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -508,6 +513,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX10-GISEL-NEXT: s_mov_b32 s6, s3 ; GFX10-GISEL-NEXT: s_or_b64 s[2:3], s[6:7], s[4:5] ; GFX10-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc dlc +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_clause 0x1 ; GFX10-GISEL-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 @@ -519,8 +525,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX10-GISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] ; GFX10-GISEL-NEXT: s_mov_b32 s6, s3 ; GFX10-GISEL-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen +; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -541,6 +547,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] ; GFX11-SDAG-NEXT: buffer_load_b32 v0, v0, s[8:11], 0 offen glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-NEXT: s_clause 0x1 ; GFX11-SDAG-NEXT: s_load_b32 s13, s[4:5], 0x30 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x20 @@ -554,8 +561,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX11-SDAG-NEXT: s_mov_b32 s3, s12 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-NEXT: buffer_store_b32 v0, v1, s[4:7], 0 offen dlc +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -576,6 +583,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] ; GFX11-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen glc dlc +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX11-GISEL-NEXT: s_clause 0x1 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x20 ; GFX11-GISEL-NEXT: s_load_b32 s7, s[4:5], 0x30 @@ -588,8 +596,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX11-GISEL-NEXT: s_mov_b32 s8, s3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX11-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], 0 offen dlc +; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_endpgm ; ; GFX12-SDAG-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -610,6 +618,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-SDAG-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] ; GFX12-SDAG-NEXT: buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_NT scope:SCOPE_SYS +; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: s_clause 0x1 ; GFX12-SDAG-NEXT: s_load_b32 s13, s[4:5], 0x30 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x20 @@ -623,8 +632,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX12-SDAG-NEXT: s_mov_b32 s3, s12 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen th:TH_STORE_NT scope:SCOPE_SYS +; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: buffer_nontemporal_and_volatile_load_store: @@ -645,6 +654,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] ; GFX12-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen th:TH_LOAD_NT scope:SCOPE_SYS +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_clause 0x1 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x20 ; GFX12-GISEL-NEXT: s_load_b32 s7, s[4:5], 0x30 @@ -657,8 +667,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX12-GISEL-NEXT: s_mov_b32 s8, s3 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen th:TH_STORE_NT scope:SCOPE_SYS +; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: s_endpgm entry: %val = load volatile i32, ptr addrspace(7) %in, !nontemporal !0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll new file mode 100644 index 0000000..0b40c81 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll @@ -0,0 +1,542 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx90a -global-isel=0 < %s | FileCheck --check-prefixes=GFX90A %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx942 -global-isel=0 < %s | FileCheck --check-prefixes=GFX942 %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx942 -global-isel=1 < %s | FileCheck --check-prefixes=GFX942-GISEL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -global-isel=0 < %s | FileCheck --check-prefixes=GFX10 %s + +define amdgpu_ps void @global_load_lds_dword_volatile(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: global_load_lds_dword_volatile: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_mov_b32_e32 v2, s1 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX90A-NEXT: s_mov_b32 m0, s2 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: global_load_dword v[0:1], off glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: global_load_lds_dword_volatile: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: global_load_lds_dword_volatile: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: global_load_lds_dword_volatile: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo +; GFX10-NEXT: s_mov_b32 m0, s2 +; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @global_load_lds_dword_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: global_load_lds_dword_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_mov_b32_e32 v2, s1 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: s_mov_b32 m0, s2 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX90A-NEXT: global_load_dword v[0:1], off glc slc lds +; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: global_load_lds_dword_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-NEXT: global_load_lds_dword v[0:1], off nt +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: global_load_lds_dword_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off nt +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: global_load_lds_dword_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo +; GFX10-NEXT: s_mov_b32 m0, s2 +; GFX10-NEXT: global_load_dword v[0:1], off slc lds +; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @global_load_lds_dword_volatile_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: global_load_lds_dword_volatile_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_mov_b32_e32 v2, s1 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX90A-NEXT: s_mov_b32 m0, s2 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: global_load_dword v[0:1], off glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: global_load_lds_dword_volatile_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: global_load_lds_dword_volatile_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: global_load_lds_dword_volatile_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo +; GFX10-NEXT: s_mov_b32 m0, s2 +; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0 + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @load_to_lds_p1_dword_volatile(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: load_to_lds_p1_dword_volatile: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_mov_b32_e32 v2, s1 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX90A-NEXT: s_mov_b32 m0, s2 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: global_load_dword v[0:1], off glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: load_to_lds_p1_dword_volatile: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: load_to_lds_p1_dword_volatile: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: load_to_lds_p1_dword_volatile: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo +; GFX10-NEXT: s_mov_b32 m0, s2 +; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @load_to_lds_p1_dword_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: load_to_lds_p1_dword_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_mov_b32_e32 v2, s1 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: s_mov_b32 m0, s2 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX90A-NEXT: global_load_dword v[0:1], off glc slc lds +; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: load_to_lds_p1_dword_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-NEXT: global_load_lds_dword v[0:1], off nt +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: load_to_lds_p1_dword_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off nt +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: load_to_lds_p1_dword_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo +; GFX10-NEXT: s_mov_b32 m0, s2 +; GFX10-NEXT: global_load_dword v[0:1], off slc lds +; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @load_to_lds_p1_dword_volatile_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: load_to_lds_p1_dword_volatile_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_mov_b32_e32 v2, s1 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX90A-NEXT: s_mov_b32 m0, s2 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: global_load_dword v[0:1], off glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: load_to_lds_p1_dword_volatile_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: load_to_lds_p1_dword_volatile_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: load_to_lds_p1_dword_volatile_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo +; GFX10-NEXT: s_mov_b32 m0, s2 +; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0 + call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @load_to_lds_p7_dword_volatile(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: load_to_lds_p7_dword_volatile: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX90A-NEXT: s_mov_b32 m0, s5 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: load_to_lds_p7_dword_volatile: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-NEXT: s_mov_b32 m0, s5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: load_to_lds_p7_dword_volatile: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: load_to_lds_p7_dword_volatile: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 +; GFX10-NEXT: s_mov_b32 m0, s5 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @load_to_lds_p7_dword_nontemporal(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: load_to_lds_p7_dword_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX90A-NEXT: s_mov_b32 m0, s5 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc slc lds +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: load_to_lds_p7_dword_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-NEXT: s_mov_b32 m0, s5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: load_to_lds_p7_dword_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: load_to_lds_p7_dword_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 +; GFX10-NEXT: s_mov_b32 m0, s5 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen slc lds +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @load_to_lds_p7_dword_volatile_nontemporal(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: load_to_lds_p7_dword_volatile_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX90A-NEXT: s_mov_b32 m0, s5 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: load_to_lds_p7_dword_volatile_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-NEXT: s_mov_b32 m0, s5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: load_to_lds_p7_dword_volatile_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: load_to_lds_p7_dword_volatile_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 +; GFX10-NEXT: s_mov_b32 m0, s5 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX10-NEXT: s_endpgm + %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0 + call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_volatile(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_volatile: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_mov_b32 m0, s4 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_volatile: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_mov_b32 m0, s4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_volatile: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: s_mov_b32 m0, s4 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_volatile: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 m0, s4 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX10-NEXT: s_endpgm + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 2147483648) + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_nontemporal(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_mov_b32 m0, s4 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc slc lds +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_mov_b32 m0, s4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: s_mov_b32 m0, s4 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 m0, s4 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen slc lds +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX10-NEXT: s_endpgm + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 0), !nontemporal !0 + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0) + ret void +} + +define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_volatile_nontemporal(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { +; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_mov_b32 m0, s4 +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX90A-NEXT: s_endpgm +; +; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_mov_b32 m0, s4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: +; GFX942-GISEL: ; %bb.0: +; GFX942-GISEL-NEXT: s_mov_b32 m0, s4 +; GFX942-GISEL-NEXT: s_nop 0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 m0, s4 +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds +; GFX10-NEXT: s_endpgm + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 2147483648), !nontemporal !0 + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0) + ret void +} + +!0 = !{i32 1} diff --git a/llvm/test/CodeGen/BPF/BTF/variant-part.ll b/llvm/test/CodeGen/BPF/BTF/variant-part.ll new file mode 100644 index 0000000..1071e61 --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/variant-part.ll @@ -0,0 +1,87 @@ +; RUN: llc -mtriple=bpfel -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s +; RUN: llc -mtriple=bpfeb -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s +; +; Source: +; #![no_std] +; #![no_main] +; +; pub enum MyEnum { +; First { a: u32, b: i32 }, +; Second(u32), +; } +; +; #[unsafe(no_mangle)] +; pub static X: MyEnum = MyEnum::First { a: 54, b: -23 }; +; +; #[cfg(not(test))] +; #[panic_handler] +; fn panic(_info: &core::panic::PanicInfo) -> ! { +; loop {} +; } +; Compilation flag: +; cargo +nightly rustc -Zbuild-std=core --target=bpfel-unknown-none -- --emit=llvm-bc +; llvm-extract --glob=X $(find target/ -name "*.bc" | head -n 1) -o variant-part.bc +; llvm-dis variant-part.bc -o variant-part.ll + +; ModuleID = 'variant-part.bc' +source_filename = "c0znihgkvro8hs0n88fgrtg6x" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "bpfel" + +@X = constant [12 x i8] c"\00\00\00\006\00\00\00\E9\FF\FF\FF", align 4, !dbg !0 + +!llvm.module.flags = !{!22, !23, !24, !25} +!llvm.ident = !{!26} +!llvm.dbg.cu = !{!27} + +; CHECK-BTF: [1] STRUCT 'MyEnum' size=12 vlen=1 +; CHECK-BTF-NEXT: '(anon)' type_id=3 bits_offset=0 +; CHECK-BTF-NEXT: [2] INT 'u32' size=4 bits_offset=0 nr_bits=32 encoding=(none) +; CHECK-BTF-NEXT: [3] UNION '(anon)' size=12 vlen=3 +; CHECK-BTF-NEXT: '(anon)' type_id=2 bits_offset=0 +; CHECK-BTF-NEXT: 'First' type_id=4 bits_offset=0 +; CHECK-BTF-NEXT: 'Second' type_id=6 bits_offset=0 +; CHECK-BTF-NEXT: [4] STRUCT 'First' size=12 vlen=2 +; CHECK-BTF-NEXT: 'a' type_id=2 bits_offset=32 +; CHECK-BTF-NEXT: 'b' type_id=5 bits_offset=64 +; CHECK-BTF-NEXT: [5] INT 'i32' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED +; CHECK-BTF-NEXT: [6] STRUCT 'Second' size=12 vlen=1 +; CHECK-BTF-NEXT: '__0' type_id=2 bits_offset=32 +; CHECK-BTF-NEXT: [7] VAR 'X' type_id=1, linkage=global +; CHECK-BTF-NEXT: [8] DATASEC '.rodata' size=0 vlen=1 +; CHECK-BTF-NEXT: type_id=7 offset=0 size=12 + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "X", scope: !2, file: !3, line: 10, type: !4, isLocal: false, isDefinition: true, align: 32) +!2 = !DINamespace(name: "variant_part", scope: null) +!3 = !DIFile(filename: "variant-part/src/main.rs", directory: "/tmp/variant-part", checksumkind: CSK_MD5, checksum: "b94cd53886ea8f14cbc116b36bc7dd36") +!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyEnum", scope: !2, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !6, templateParams: !16, identifier: "faba668fd9f71e9b7cf3b9ac5e8b93cb") +!5 = !DIFile(filename: "<unknown>", directory: "") +!6 = !{!7} +!7 = !DICompositeType(tag: DW_TAG_variant_part, scope: !4, file: !5, size: 96, align: 32, elements: !8, templateParams: !16, identifier: "e4aee046fc86d111657622fdcb8c42f7", discriminator: !21) +!8 = !{!9, !17} +!9 = !DIDerivedType(tag: DW_TAG_member, name: "First", scope: !7, file: !5, baseType: !10, size: 96, align: 32, extraData: i32 0) +!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "First", scope: !4, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !11, templateParams: !16, identifier: "cc7748c842e275452db4205b190c8ff7") +!11 = !{!12, !14} +!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !10, file: !5, baseType: !13, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!13 = !DIBasicType(name: "u32", size: 32, encoding: DW_ATE_unsigned) +!14 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !10, file: !5, baseType: !15, size: 32, align: 32, offset: 64, flags: DIFlagPublic) +!15 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed) +!16 = !{} +!17 = !DIDerivedType(tag: DW_TAG_member, name: "Second", scope: !7, file: !5, baseType: !18, size: 96, align: 32, extraData: i32 1) +!18 = !DICompositeType(tag: DW_TAG_structure_type, name: "Second", scope: !4, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !19, templateParams: !16, identifier: "a2094b1381f3082d504fbd0903aa7c06") +!19 = !{!20} +!20 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !18, file: !5, baseType: !13, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!21 = !DIDerivedType(tag: DW_TAG_member, scope: !4, file: !5, baseType: !13, size: 32, align: 32, flags: DIFlagArtificial) +!22 = !{i32 8, !"PIC Level", i32 2} +!23 = !{i32 7, !"PIE Level", i32 2} +!24 = !{i32 7, !"Dwarf Version", i32 4} +!25 = !{i32 2, !"Debug Info Version", i32 3} +!26 = !{!"rustc version 1.91.0-nightly (160e7623e 2025-08-26)"} +!27 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !28, producer: "clang LLVM (rustc version 1.91.0-nightly (160e7623e 2025-08-26))", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !29, splitDebugInlining: false, nameTableKind: None) +!28 = !DIFile(filename: "variant-part/src/main.rs/@/c0znihgkvro8hs0n88fgrtg6x", directory: "/tmp/variant-part") +!29 = !{!0} diff --git a/llvm/test/Transforms/GVNSink/ptrtoaddr.ll b/llvm/test/Transforms/GVNSink/ptrtoaddr.ll new file mode 100644 index 0000000..3a9d16e --- /dev/null +++ b/llvm/test/Transforms/GVNSink/ptrtoaddr.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=gvn-sink < %s | FileCheck %s + +define i64 @test(i1 %c, ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @test( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: br label %[[JOIN:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[JOIN]] +; CHECK: [[JOIN]]: +; CHECK-NEXT: [[P2_SINK:%.*]] = phi ptr [ [[P2]], %[[ELSE]] ], [ [[P]], %[[IF]] ] +; CHECK-NEXT: [[PHI:%.*]] = ptrtoaddr ptr [[P2_SINK]] to i64 +; CHECK-NEXT: ret i64 [[PHI]] +; + br i1 %c, label %if, label %else + +if: + %p.addr = ptrtoaddr ptr %p to i64 + br label %join + +else: + %p2.addr = ptrtoaddr ptr %p2 to i64 + br label %join + +join: + %phi = phi i64 [ %p.addr, %if ], [ %p2.addr, %else ] + ret i64 %phi +} diff --git a/llvm/test/Transforms/Inline/ML/recursive.ll b/llvm/test/Transforms/Inline/ML/recursive.ll new file mode 100644 index 0000000..2d9240a --- /dev/null +++ b/llvm/test/Transforms/Inline/ML/recursive.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 +; REQUIRES: llvm_inliner_model_autogenerated +; RUN: opt -S %s -o - -passes='inliner-ml-advisor-release' -ml-inliner-skip-policy=if-caller-not-cold | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-unknown-linux-android29" + +define i32 @a_func(ptr %this, i32 %color_id, i1 %dark_mode) local_unnamed_addr { +; CHECK-LABEL: define i32 @a_func( +; CHECK-SAME: ptr [[THIS:%.*]], i32 [[COLOR_ID:%.*]], i1 [[DARK_MODE:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[DARK_MODE]], label %[[SW_BB97:.*]], label %[[COMMON_RET:.*]] +; CHECK: [[COMMON_RET]]: +; CHECK-NEXT: ret i32 0 +; CHECK: [[SW_BB97]]: +; CHECK-NEXT: br label %[[COMMON_RET]] +; +entry: + br i1 %dark_mode, label %sw.bb97, label %common.ret + +common.ret: ; preds = %sw.bb97, %entry + ret i32 0 + +sw.bb97: ; preds = %entry + %call.i = tail call i32 @a_func(ptr null, i32 0, i1 false) + br label %common.ret +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read) +declare ptr @llvm.load.relative.i32(ptr %0, i32 %1) #0 + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +;. diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll index ffaa8b1..03a2d4b 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes=instsimplify -S | FileCheck %s ; The ptrtoaddr folds are also valid for pointers that have external state. target datalayout = "pe1:64:64:64:32" @@ -34,77 +34,95 @@ define i32 @ptrtoaddr_inttoptr() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr() { ; CHECK-NEXT: ret i32 -1 ; - ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i32 -1 to ptr addrspace(1)) to i32) + %toptr = inttoptr i32 -1 to ptr addrspace(1) + %toaddr = ptrtoaddr ptr addrspace(1) %toptr to i32 + ret i32 %toaddr } define i32 @ptrtoaddr_inttoptr_diff_size1() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size1() { ; CHECK-NEXT: ret i32 -1 ; - ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32) + %toptr = inttoptr i64 -1 to ptr addrspace(1) + %toaddr = ptrtoaddr ptr addrspace(1) %toptr to i32 + ret i32 %toaddr } define i32 @ptrtoaddr_inttoptr_diff_size2() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size2() { ; CHECK-NEXT: ret i32 65535 ; - ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32) + %toptr = inttoptr i16 -1 to ptr addrspace(1) + %toaddr = ptrtoaddr ptr addrspace(1) %toptr to i32 + ret i32 %toaddr } define i64 @ptrtoaddr_inttoptr_noas1() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas1() { ; CHECK-NEXT: ret i64 1 ; - ret i64 ptrtoaddr (ptr getelementptr (i8, ptr null, i64 1) to i64) + %toptr = getelementptr i8, ptr null, i64 1 + %toaddr = ptrtoaddr ptr %toptr to i64 + ret i64 %toaddr } define i64 @ptr2addr2_inttoptr_noas2() { ; CHECK-LABEL: define i64 @ptr2addr2_inttoptr_noas2() { ; CHECK-NEXT: ret i64 123 ; - ret i64 ptrtoaddr (ptr inttoptr (i64 123 to ptr) to i64) + %toptr = inttoptr i64 123 to ptr + %toaddr = ptrtoaddr ptr %toptr to i64 + ret i64 %toaddr } define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { ; CHECK-NEXT: ret i64 4294967295 ; - ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64) + %toptr = inttoptr i32 -1 to ptr + %toaddr = ptrtoaddr ptr %toptr to i64 + ret i64 %toaddr } define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { ; CHECK-NEXT: ret i64 -1 ; - ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64) + %toptr = inttoptr i128 -1 to ptr + %toaddr = ptrtoaddr ptr %toptr to i64 + ret i64 %toaddr } define i64 @ptrtoaddr_gep_null() { ; CHECK-LABEL: define i64 @ptrtoaddr_gep_null() { ; CHECK-NEXT: ret i64 42 ; - ret i64 ptrtoaddr (ptr getelementptr (i8, ptr null, i64 42) to i64) + %toaddr = ptrtoaddr ptr getelementptr (i8, ptr null, i64 42) to i64 + ret i64 %toaddr } define i32 @ptrtoaddr_gep_null_addrsize() { ; CHECK-LABEL: define i32 @ptrtoaddr_gep_null_addrsize() { ; CHECK-NEXT: ret i32 42 ; - ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i32 42) to i32) + %toaddr = ptrtoaddr ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i32 42) to i32 + ret i32 %toaddr } define i64 @ptrtoaddr_gep_sub() { ; CHECK-LABEL: define i64 @ptrtoaddr_gep_sub() { ; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) ; - ret i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoaddr (ptr @g2 to i64))) to i64) + %toaddr = ptrtoaddr ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoaddr (ptr @g2 to i64))) to i64 + ret i64 %toaddr } define i32 @ptrtoaddr_gep_sub_addrsize() { ; CHECK-LABEL: define i32 @ptrtoaddr_gep_sub_addrsize() { ; CHECK-NEXT: ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32)) ; - ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 sub (i32 0, i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32))) to i32) + %toaddr = ptrtoaddr ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 sub (i32 0, i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32))) to i32 + ret i32 %toaddr } ; Don't fold inttoptr of ptrtoaddr away. inttoptr will pick a previously @@ -114,28 +132,32 @@ define ptr @inttoptr_of_ptrtoaddr() { ; CHECK-LABEL: define ptr @inttoptr_of_ptrtoaddr() { ; CHECK-NEXT: ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) ; - ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) + %toptr = inttoptr i64 ptrtoaddr (ptr @g to i64) to ptr + ret ptr %toptr } define i64 @ptrtoaddr_sub_consts_unrelated() { ; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_unrelated() { ; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) ; - ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) + %sub = sub i64 ptrtoaddr (ptr @g to i64), ptrtoaddr (ptr @g2 to i64) + ret i64 %sub } define i64 @ptrtoaddr_sub_consts_offset() { ; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_offset() { ; CHECK-NEXT: ret i64 42 ; - ret i64 sub (i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 42) to i64), i64 ptrtoaddr (ptr @g to i64)) + %sub = sub i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 42) to i64), ptrtoaddr (ptr @g to i64) + ret i64 %sub } define i32 @ptrtoaddr_sub_consts_offset_addrsize() { ; CHECK-LABEL: define i32 @ptrtoaddr_sub_consts_offset_addrsize() { ; CHECK-NEXT: ret i32 42 ; - ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 42) to i32), i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32)) + %sub = sub i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 42) to i32), ptrtoaddr (ptr addrspace(1) @g.as1 to i32) + ret i32 %sub } define i64 @ptrtoaddr_sub_known_offset(ptr %p) { @@ -161,3 +183,27 @@ define i32 @ptrtoaddr_sub_known_offset_addrsize(ptr addrspace(1) %p) { %sub = sub i32 %p2.addr, %p.addr ret i32 %sub } + +define i64 @ptrtoaddr_of_ptradd_of_sub(i64 %x, ptr %p) { +; CHECK-LABEL: define i64 @ptrtoaddr_of_ptradd_of_sub( +; CHECK-SAME: i64 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: ret i64 [[X]] +; + %p.addr = ptrtoaddr ptr %p to i64 + %sub = sub i64 %x, %p.addr + %ptradd = getelementptr i8, ptr %p, i64 %sub + %ptradd.addr = ptrtoaddr ptr %ptradd to i64 + ret i64 %ptradd.addr +} + +define i32 @ptrtoaddr_of_ptradd_of_sub_addrsize(i32 %x, ptr addrspace(1) %p) { +; CHECK-LABEL: define i32 @ptrtoaddr_of_ptradd_of_sub_addrsize( +; CHECK-SAME: i32 [[X:%.*]], ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: ret i32 [[X]] +; + %p.addr = ptrtoaddr ptr addrspace(1) %p to i32 + %sub = sub i32 %x, %p.addr + %ptradd = getelementptr i8, ptr addrspace(1) %p, i32 %sub + %ptradd.addr = ptrtoaddr ptr addrspace(1) %ptradd to i32 + ret i32 %ptradd.addr +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll index 3ee9cf8..ffdff21 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll @@ -124,18 +124,14 @@ define void @test_known_trip_count() { ; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[WIDE_LOAD3_13]], [[WIDE_LOAD5_13]] ; CHECK-NEXT: store <2 x double> [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 416), align 16 ; CHECK-NEXT: store <2 x double> [[TMP27]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 432), align 16 -; CHECK-NEXT: [[WIDE_LOAD_14:%.*]] = load <2 x double>, ptr getelementptr inbounds nuw (i8, ptr @b, i64 448), align 16 -; CHECK-NEXT: [[WIDE_LOAD3_14:%.*]] = load <2 x double>, ptr getelementptr inbounds nuw (i8, ptr @b, i64 464), align 16 -; CHECK-NEXT: [[WIDE_LOAD4_14:%.*]] = load <2 x double>, ptr getelementptr inbounds nuw (i8, ptr @c, i64 448), align 16 -; CHECK-NEXT: [[WIDE_LOAD5_14:%.*]] = load <2 x double>, ptr getelementptr inbounds nuw (i8, ptr @c, i64 464), align 16 -; CHECK-NEXT: [[TMP28:%.*]] = fadd <2 x double> [[WIDE_LOAD_14]], [[WIDE_LOAD4_14]] -; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x double> [[WIDE_LOAD3_14]], [[WIDE_LOAD5_14]] -; CHECK-NEXT: store <2 x double> [[TMP28]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 448), align 16 -; CHECK-NEXT: store <2 x double> [[TMP29]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 464), align 16 -; CHECK-NEXT: [[TMP30:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @b, i64 480), align 16 -; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @c, i64 480), align 16 +; CHECK-NEXT: [[TMP30:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @b, i64 448), align 16 +; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @c, i64 448), align 16 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[TMP31]] -; CHECK-NEXT: store double [[ADD]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 480), align 16 +; CHECK-NEXT: store double [[ADD]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 448), align 16 +; CHECK-NEXT: [[TMP32:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @b, i64 456), align 8 +; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @c, i64 456), align 8 +; CHECK-NEXT: [[ADD_1:%.*]] = fadd double [[TMP32]], [[TMP33]] +; CHECK-NEXT: store double [[ADD_1]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 456), align 8 ; CHECK-NEXT: ret void ; entry: @@ -143,7 +139,7 @@ entry: for.cond: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %cmp = icmp slt i32 %i.0, 61 + %cmp = icmp slt i32 %i.0, 58 br i1 %cmp, label %for.body, label %exit for.body: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll new file mode 100644 index 0000000..d16843c --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define ptr @test(ptr %d) { +; CHECK-LABEL: define ptr @test( +; CHECK-SAME: ptr [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1 +; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0 +; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4> +; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0> +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <6 x i64> [[TMP9]], i32 2 +; CHECK-NEXT: [[SCEVGEP42:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <6 x i64> [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <6 x i64> [[TMP9]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <6 x i64> [[TMP9]], i32 5 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP19]] +; CHECK-NEXT: ret ptr [[TMP20]] +; +entry: + %0 = load i8, ptr null, align 1 + %cmp4.2 = icmp eq i8 %0, 0 + %1 = select i1 %cmp4.2, i64 0, i64 0 + %2 = shl i64 %1, 1 + %3 = getelementptr i8, ptr %d, i64 %2 + %4 = xor i64 0, 0 + %5 = udiv i64 %4, 0 + %6 = mul i64 %5, 6 + %7 = getelementptr i8, ptr %d, i64 %6 + %8 = shl i64 %1, 0 + %scevgep42 = getelementptr i8, ptr %d, i64 %8 + %9 = mul i64 %5, 1 + %10 = getelementptr i8, ptr %d, i64 %9 + %11 = udiv i64 1, 0 + %12 = mul i64 %11, 1 + %13 = getelementptr i8, ptr %d, i64 %12 + %14 = mul i64 %11, 0 + %15 = getelementptr i8, ptr %d, i64 %14 + ret ptr %15 +} diff --git a/llvm/test/Transforms/SpeculativeExecution/spec-casts.ll b/llvm/test/Transforms/SpeculativeExecution/spec-casts.ll index c966bed..c7500cf 100644 --- a/llvm/test/Transforms/SpeculativeExecution/spec-casts.ll +++ b/llvm/test/Transforms/SpeculativeExecution/spec-casts.ll @@ -1,147 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=speculative-execution \ ; RUN: -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \ ; RUN: | FileCheck %s -; CHECK-LABEL: @ifThen_bitcast( -; CHECK: bitcast -; CHECK: br i1 true -define void @ifThen_bitcast() { +define void @ifThen_bitcast(i32 %arg) { +; CHECK-LABEL: define void @ifThen_bitcast( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = bitcast i32 [[ARG]] to float +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = bitcast i32 undef to float + %x = bitcast i32 %arg to float br label %b b: ret void } -; CHECK-LABEL: @ifThen_ptrtoint( -; CHECK: ptrtoint -; CHECK: br i1 true -define void @ifThen_ptrtoint() { +define void @ifThen_ptrtoint(ptr %arg) { +; CHECK-LABEL: define void @ifThen_ptrtoint( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = ptrtoint ptr undef to i64 + %x = ptrtoint ptr %arg to i64 br label %b b: ret void } -; CHECK-LABEL: @ifThen_inttoptr( -; CHECK: inttoptr -; CHECK: br i1 true -define void @ifThen_inttoptr() { +define void @ifThen_ptrtoaddr(ptr %arg) { +; CHECK-LABEL: define void @ifThen_ptrtoaddr( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = ptrtoaddr ptr [[ARG]] to i64 +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = inttoptr i64 undef to ptr + %x = ptrtoaddr ptr %arg to i64 br label %b b: ret void } -; CHECK-LABEL: @ifThen_addrspacecast( -; CHECK: addrspacecast -; CHECK: br i1 true -define void @ifThen_addrspacecast() { +define void @ifThen_inttoptr(i64 %arg) { +; CHECK-LABEL: define void @ifThen_inttoptr( +; CHECK-SAME: i64 [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = inttoptr i64 [[ARG]] to ptr +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; + br i1 true, label %a, label %b + +a: + %x = inttoptr i64 %arg to ptr + br label %b + +b: + ret void +} + +define void @ifThen_addrspacecast(ptr %arg) { +; CHECK-LABEL: define void @ifThen_addrspacecast( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(1) +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = addrspacecast ptr undef to ptr addrspace(1) + %x = addrspacecast ptr %arg to ptr addrspace(1) br label %b b: ret void } -; CHECK-LABEL: @ifThen_fptoui( -; CHECK: fptoui -; CHECK: br i1 true -define void @ifThen_fptoui() { +define void @ifThen_fptoui(float %arg) { +; CHECK-LABEL: define void @ifThen_fptoui( +; CHECK-SAME: float [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = fptoui float [[ARG]] to i32 +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = fptoui float undef to i32 + %x = fptoui float %arg to i32 br label %b b: ret void } -; CHECK-LABEL: @ifThen_fptosi( -; CHECK: fptosi -; CHECK: br i1 true -define void @ifThen_fptosi() { +define void @ifThen_fptosi(float %arg) { +; CHECK-LABEL: define void @ifThen_fptosi( +; CHECK-SAME: float [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = fptosi float [[ARG]] to i32 +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = fptosi float undef to i32 + %x = fptosi float %arg to i32 br label %b b: ret void } -; CHECK-LABEL: @ifThen_uitofp( -; CHECK: uitofp -; CHECK: br i1 true -define void @ifThen_uitofp() { +define void @ifThen_uitofp(i32 %arg) { +; CHECK-LABEL: define void @ifThen_uitofp( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[ARG]] to float +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = uitofp i32 undef to float + %x = uitofp i32 %arg to float br label %b b: ret void } -; CHECK-LABEL: @ifThen_sitofp( -; CHECK: sitofp -; CHECK: br i1 true -define void @ifThen_sitofp() { +define void @ifThen_sitofp(i32 %arg) { +; CHECK-LABEL: define void @ifThen_sitofp( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = sitofp i32 [[ARG]] to float +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = sitofp i32 undef to float + %x = sitofp i32 %arg to float br label %b b: ret void } -; CHECK-LABEL: @ifThen_fpext( -; CHECK: fpext -; CHECK: br i1 true -define void @ifThen_fpext() { +define void @ifThen_fpext(float %arg) { +; CHECK-LABEL: define void @ifThen_fpext( +; CHECK-SAME: float [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = fpext float [[ARG]] to double +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = fpext float undef to double + %x = fpext float %arg to double br label %b b: ret void } -; CHECK-LABEL: @ifThen_fptrunc( -; CHECK: fptrunc -; CHECK: br i1 true -define void @ifThen_fptrunc() { +define void @ifThen_fptrunc(double %arg) { +; CHECK-LABEL: define void @ifThen_fptrunc( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = fptrunc double [[ARG]] to float +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = fptrunc double undef to float + %x = fptrunc double %arg to float br label %b b: ret void } -; CHECK-LABEL: @ifThen_trunc( -; CHECK: trunc -; CHECK: br i1 true -define void @ifThen_trunc() { +define void @ifThen_trunc(i32 %arg) { +; CHECK-LABEL: define void @ifThen_trunc( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = trunc i32 [[ARG]] to i16 +; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]] +; CHECK: [[A]]: +; CHECK-NEXT: br label %[[B]] +; CHECK: [[B]]: +; CHECK-NEXT: ret void +; br i1 true, label %a, label %b a: - %x = trunc i32 undef to i16 + %x = trunc i32 %arg to i16 br label %b b: diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml index 42ef7f3..5f19a29 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml @@ -4,6 +4,7 @@ # RUN: split-file %s %t # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM +# The object file is manually tampered with such that the LLVM_stmt_seq of function_bad_stmt_seq is 0xFFFFFFFF. # RUN: llvm-gsymutil --num-threads=1 --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym # RUN: llvm-gsymutil --num-threads=1 --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym @@ -36,7 +37,13 @@ # CHECK-MERGED-CALLSITES: CallSites (by relative return offset): # CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1] -# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,MAIN:]]: [0x[[#%x,MAIN_START:]] - 0x[[#%x,MAIN_END:]]) "main" +# If we don't do anything, a function with bad LLVM_stmt_seq won't have any call site filter +# More importantly, the line number will be at the function definition. +# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,BAD_STMT_SEQ:]]: [0x[[#%x,BAD_STMT_SEQ_START:]] - 0x[[#%x,BAD_STMT_SEQ_END:]]) "function_bad_stmt_seq" +# CHECK-MERGED-CALLSITES-NEXT: LineTable: +# CHECK-MERGED-CALLSITES-NEXT: 0x00000001000003b0 ./merged_funcs_test.cpp:65 + +# CHECK-MERGED-CALLSITES-NEXT: FunctionInfo @ 0x[[#%x,MAIN:]]: [0x[[#%x,MAIN_START:]] - 0x[[#%x,MAIN_END:]]) "main" # CHECK-MERGED-CALLSITES: CallSites (by relative return offset): # CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function1] # CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy2] @@ -44,16 +51,17 @@ # CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2] # CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1] - ### Check that we can correctly resove merged functions using callstacks: ### Resolve two callstacks containing merged functions. ### We use the value obtained from `CallSites:[FILTER]` to pass to the next call to `llvm-gsymutil` via `--merged-functions-filter`. ### The callstacks resolve differently based on the merged functions filter. -### 0x00000001000003d0 => 0x000000010000037c => 0x000000010000035c => 0x0000000100000340 -### 0x00000001000003e8 =========================> 0x000000010000035c => 0x0000000100000340 +### 0x00000001000003d8 => 0x000000010000037c => 0x000000010000035c => 0x0000000100000340 +### 0x00000001000003f0 =========================> 0x000000010000035c => 0x0000000100000340 +### +### We added a new function, for main function, +8 for line numbers, +0x8 for addresses. -# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d0 | FileCheck --check-prefix=CHECK-C1 %s -# CHECK-C1: 0x00000001000003d0: main + 32 @ ./merged_funcs_test.cpp:63 +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d8 | FileCheck --check-prefix=CHECK-C1 %s +# CHECK-C1: 0x00000001000003d8: main + 32 @ ./merged_funcs_test.cpp:71 # CHECK-C1-NEXT: CallSites: function2_copy2 # RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000037c --merged-functions-filter="function2_copy2" | FileCheck --check-prefix=CHECK-C2 %s @@ -73,9 +81,9 @@ ### ---------------------------------------------------------------------------------------------------------------------------------- ### Resolve the 2nd call stack - the 2nd and 3rd addresses are the same but they resolve to a different function because of the filter -# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --address=0x00000001000003e8 --merged-functions | FileCheck --check-prefix=CHECK-C5 %s -# CHECK-C5: Found 1 function at address 0x00000001000003e8: -# CHECK-C5-NEXT: 0x00000001000003e8: main + 56 @ ./merged_funcs_test.cpp:64 +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --address=0x00000001000003f0 --merged-functions | FileCheck --check-prefix=CHECK-C5 %s +# CHECK-C5: Found 1 function at address 0x00000001000003f0: +# CHECK-C5-NEXT: 0x00000001000003f0: main + 56 @ ./merged_funcs_test.cpp:72 # CHECK-C5-NEXT: CallSites: function3_copy2 # RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy2" | FileCheck --check-prefix=CHECK-C6 %s @@ -87,6 +95,9 @@ # CHECK-C7: Found 1 function at address 0x0000000100000340: # CHECK-C7-NEXT: 0x0000000100000340: function4_copy2 + 8 @ ./merged_funcs_test.cpp:14 +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --address=0x00000001000003b4 --merged-functions | FileCheck --check-prefix=CHECK-BAD %s +# CHECK-BAD: Found 1 function at address 0x00000001000003b4: +# CHECK-BAD-NEXT: 0x00000001000003b4: function_bad_stmt_seq + 4 @ ./merged_funcs_test.cpp:65 #--- merged_funcs_test.cpp #define ATTRIB extern "C" __attribute__((noinline)) @@ -148,6 +159,14 @@ ATTRIB int function1(int a) { return result; } +// Intentional multi-line function definition +ATTRIB +int function_bad_stmt_seq( + int a +) { + return a + 1; +} + int main() { int sum = 0; sum += function1(1); @@ -155,6 +174,7 @@ int main() { sum += function4_copy2(4); sum += function3_copy2(41); sum += function2_copy1(11); + sum += function_bad_stmt_seq(sum); return sum; } @@ -229,7 +249,7 @@ FileHeader: LoadCommands: - cmd: LC_UUID cmdsize: 24 - uuid: 4C4C441A-5555-3144-A124-E395C0E7AA96 + uuid: 4C4C44C9-5555-3144-A16C-82A90B2087B3 - cmd: LC_BUILD_VERSION cmdsize: 24 platform: 1 @@ -239,9 +259,9 @@ LoadCommands: - cmd: LC_SYMTAB cmdsize: 24 symoff: 4096 - nsyms: 10 - stroff: 4256 - strsize: 156 + nsyms: 11 + stroff: 4272 + strsize: 179 - cmd: LC_SEGMENT_64 cmdsize: 72 segname: __PAGEZERO @@ -268,7 +288,7 @@ LoadCommands: - sectname: __text segname: __TEXT addr: 0x100000338 - size: 208 + size: 228 offset: 0x0 align: 2 reloff: 0x0 @@ -277,7 +297,7 @@ LoadCommands: reserved1: 0x0 reserved2: 0x0 reserved3: 0x0 - content: CFFAEDFE0C000001000000000A00000008000000C005000000000000000000001B000000180000004C4C441A55553144A124E395C0E7AA9632000000180000000100000000000B0000000B00000000000200000018000000001000000A000000A01000009C00000019000000480000005F5F504147455A45524F00000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000019000000980000005F5F54455854000000000000000000000000000001000000 + content: CFFAEDFE0C000001000000000A00000008000000C005000000000000000000001B000000180000004C4C44C955553144A16C82A90B2087B332000000180000000100000000000B0000000B00000000000200000018000000001000000B000000B0100000B300000019000000480000005F5F504147455A45524F00000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000019000000980000005F5F544558540000000000000000000000000000010000000040000000000000000000000000000000000000 - cmd: LC_SEGMENT_64 cmdsize: 152 segname: __DATA @@ -308,7 +328,7 @@ LoadCommands: vmaddr: 4295000064 vmsize: 4096 fileoff: 4096 - filesize: 316 + filesize: 355 maxprot: 1 initprot: 1 nsects: 0 @@ -319,7 +339,7 @@ LoadCommands: vmaddr: 4295004160 vmsize: 4096 fileoff: 8192 - filesize: 3507 + filesize: 3884 maxprot: 7 initprot: 3 nsects: 11 @@ -328,7 +348,7 @@ LoadCommands: - sectname: __debug_line segname: __DWARF addr: 0x100009000 - size: 323 + size: 357 offset: 0x2000 align: 0 reloff: 0x0 @@ -339,9 +359,9 @@ LoadCommands: reserved3: 0x0 - sectname: __debug_aranges segname: __DWARF - addr: 0x100009143 + addr: 0x100009165 size: 48 - offset: 0x2143 + offset: 0x2165 align: 0 reloff: 0x0 nreloc: 0 @@ -351,9 +371,9 @@ LoadCommands: reserved3: 0x0 - sectname: __debug_loc segname: __DWARF - addr: 0x100009173 - size: 1026 - offset: 0x2173 + addr: 0x100009195 + size: 1083 + offset: 0x2195 align: 0 reloff: 0x0 nreloc: 0 @@ -361,12 +381,12 @@ LoadCommands: reserved1: 0x0 reserved2: 0x0 reserved3: 0x0 - content: 00000000000000000800000000000000010050080000000000000014000000000000000400A301509F0000000000000000000000000000000004000000000000000C0000000000000001005800000000000000000000000000000000080000000000000014000000000000000100500000000000000000000000000000000000000000000000000800000000000000010050080000000000000014000000000000000400A301509F0000000000000000000000000000000004000000000000000C0000000000000001005800000000000000000000000000000000080000000000000014000000000000000100500000000000000000000000000000000014000000000000002000000000000000010050200000000000000034000000000000000400A301509F00000000000000000000000000000000240000000000000034000000000000000100500000000000000000000000000000000014000000000000002000000000000000010050200000000000000034000000000000000400A301509F00000000000000000000000000000000240000000000000034000000000000000100500000000000000000000000000000000034000000000000004000000000000000010050400000000000000058000000000000000400A301509F000000000000000000000000000000003C0000000000000040000000000000000300707E9F000000000000000000000000000000004400000000000000580000000000000001005000000000000000000000000000000000340000000000000040000000000000000300707E9F00000000000000000000000000000000440000000000000058000000000000000100500000000000000000000000000000000034000000000000004000000000000000010050400000000000000058000000000000000400A301509F000000000000000000000000000000003C0000000000000040000000000000000300707E9F000000000000000000000000000000004400000000000000580000000000000001005000000000000000000000000000000000340000000000000040000000000000000300707E9F00000000000000000000000000000000440000000000000058000000000000000100500000000000000000000000000000000058000000000000006400000000000000010050640000000000000078000000000000000400A301509F00000000000000000000000000000000680000000000000078000000000000000100500000000000000000000000000000000084000000000000009000000000000000030011009F90000000000000009C000000000000000100639C00000000000000A800000000000000010064B800000000000000C00000000000000001006300000000000000000000000000000000 + content: 00000000000000000800000000000000010050080000000000000014000000000000000400A301509F0000000000000000000000000000000004000000000000000C0000000000000001005800000000000000000000000000000000080000000000000014000000000000000100500000000000000000000000000000000000000000000000000800000000000000010050080000000000000014000000000000000400A301509F0000000000000000000000000000000004000000000000000C0000000000000001005800000000000000000000000000000000080000000000000014000000000000000100500000000000000000000000000000000014000000000000002000000000000000010050200000000000000034000000000000000400A301509F00000000000000000000000000000000240000000000000034000000000000000100500000000000000000000000000000000014000000000000002000000000000000010050200000000000000034000000000000000400A301509F00000000000000000000000000000000240000000000000034000000000000000100500000000000000000000000000000000034000000000000004000000000000000010050400000000000000058000000000000000400A301509F000000000000000000000000000000003C0000000000000040000000000000000300707E9F000000000000000000000000000000004400000000000000580000000000000001005000000000000000000000000000000000340000000000000040000000000000000300707E9F00000000000000000000000000000000440000000000000058000000000000000100500000000000000000000000000000000034000000000000004000000000000000010050400000000000000058000000000000000400A301509F000000000000000000000000000000003C0000000000000040000000000000000300707E9F000000000000000000000000000000004400000000000000580000000000000001005000000000000000000000000000000000340000000000000040000000000000000300707E9F00000000000000000000000000000000440000000000000058000000000000000100500000000000000000000000000000000058000000000000006400000000000000010050640000000000000078000000000000000400A301509F00000000000000000000000000000000680000000000000078000000000000000100500000000000000000000000000000000078000000000000007C000000000000000100507C0000000000000080000000000000000400A301509F000000000000000000000000000000008C000000000000009800000000000000030011009F9800000000000000A400000000000000010063A400000000000000B000000000000000010064C000000000000000D40000000000000001006300000000000000000000000000000000 - sectname: __debug_info segname: __DWARF - addr: 0x100009575 - size: 923 - offset: 0x2575 + addr: 0x1000095D0 + size: 988 + offset: 0x25D0 align: 0 reloff: 0x0 nreloc: 0 @@ -376,9 +396,9 @@ LoadCommands: reserved3: 0x0 - sectname: __debug_frame segname: __DWARF - addr: 0x100009910 - size: 272 - offset: 0x2910 + addr: 0x1000099AC + size: 296 + offset: 0x29AC align: 0 reloff: 0x0 nreloc: 0 @@ -386,12 +406,12 @@ LoadCommands: reserved1: 0x0 reserved2: 0x0 reserved3: 0x0 - content: 14000000FFFFFFFF0400080001781E0C1F000000000000001400000000000000380300000100000014000000000000001400000000000000380300000100000014000000000000001C000000000000004C030000010000002000000000000000480C1D109E019D021C000000000000004C030000010000002000000000000000480C1D109E019D021C000000000000006C030000010000002400000000000000480C1D109E019D021C000000000000006C030000010000002400000000000000480C1D109E019D021C0000000000000090030000010000002000000000000000480C1D109E019D022400000000000000B00300000100000058000000000000004C0C1D109E019D029303940400000000 + content: 14000000FFFFFFFF0400080001781E0C1F000000000000001400000000000000380300000100000014000000000000001400000000000000380300000100000014000000000000001C000000000000004C030000010000002000000000000000480C1D109E019D021C000000000000004C030000010000002000000000000000480C1D109E019D021C000000000000006C030000010000002400000000000000480C1D109E019D021C000000000000006C030000010000002400000000000000480C1D109E019D021C0000000000000090030000010000002000000000000000480C1D109E019D021400000000000000B00300000100000008000000000000002400000000000000B80300000100000064000000000000004C0C1D109E019D029303940400000000 - sectname: __debug_abbrev segname: __DWARF - addr: 0x100009A20 + addr: 0x100009AD4 size: 260 - offset: 0x2A20 + offset: 0x2AD4 align: 0 reloff: 0x0 nreloc: 0 @@ -401,9 +421,9 @@ LoadCommands: reserved3: 0x0 - sectname: __debug_str segname: __DWARF - addr: 0x100009B24 - size: 188 - offset: 0x2B24 + addr: 0x100009BD8 + size: 357 + offset: 0x2BD8 align: 0 reloff: 0x0 nreloc: 0 @@ -413,9 +433,9 @@ LoadCommands: reserved3: 0x0 - sectname: __apple_namespac segname: __DWARF - addr: 0x100009BE0 + addr: 0x100009D3D size: 36 - offset: 0x2BE0 + offset: 0x2D3D align: 0 reloff: 0x0 nreloc: 0 @@ -426,9 +446,9 @@ LoadCommands: content: 485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF - sectname: __apple_names segname: __DWARF - addr: 0x100009C04 - size: 316 - offset: 0x2C04 + addr: 0x100009D61 + size: 344 + offset: 0x2D61 align: 0 reloff: 0x0 nreloc: 0 @@ -436,12 +456,12 @@ LoadCommands: reserved1: 0x0 reserved2: 0x0 reserved3: 0x0 - content: 48534148010000000A0000000A0000000C0000000000000001000000010006000000000002000000030000000400000006000000FFFFFFFF0800000009000000FFFFFFFFFFFFFFFF88CB36CFF4B03BD389CB36CF0A452B694908311C0B452B694A08311CDC41AB586A7F9A7CAD7ED75898000000A8000000B8000000C8000000D8000000E8000000F80000000801000018010000280100008900000001000000BB010000000000001B000000010000002E0000000000000099000000010000003A020000000000002D000000010000004F000000000000005800000001000000E50000000000000048000000010000009A0000000000000068000000010000003901000000000000A900000001000000B902000000000000B3000000010000000D030000000000007800000002000000050200008402000000000000 + content: 48534148010000000B0000000B0000000C0000000000000001000000010006000000000002000000FFFFFFFFFFFFFFFF03000000FFFFFFFFFFFFFFFF04000000080000000A000000FFFFFFFFAD7ED75888CB36CF89CB36CFF4B03BD34908311CDC41AB580A452B696A7F9A7C4A08311C0B452B69404C8F68A4000000B8000000C8000000D8000000E8000000F800000008010000180100002801000038010000480100000B010000020000000502000084020000000000001C01000001000000BB010000000000002C010000010000003A02000000000000AE000000010000002E00000000000000EB00000001000000E5000000000000003C01000001000000B902000000000000C0000000010000004F000000000000005C010000010000003A03000000000000FB000000010000003901000000000000DB000000010000009A0000000000000046010000010000000D03000000000000 - sectname: __apple_types segname: __DWARF - addr: 0x100009D40 + addr: 0x100009EB9 size: 79 - offset: 0x2D40 + offset: 0x2EB9 align: 0 reloff: 0x0 nreloc: 0 @@ -449,12 +469,12 @@ LoadCommands: reserved1: 0x0 reserved2: 0x0 reserved3: 0x0 - content: 48534148010000000100000001000000180000000000000004000000010006000300050005000B0006000600000000003080880B38000000290000000100000048000000240000A4283A0C00000000 + content: 48534148010000000100000001000000180000000000000004000000010006000300050005000B0006000600000000003080880B38000000BC0000000100000048000000240000A4283A0C00000000 - sectname: __apple_objc segname: __DWARF - addr: 0x100009D8F + addr: 0x100009F08 size: 36 - offset: 0x2D8F + offset: 0x2F08 align: 0 reloff: 0x0 nreloc: 0 @@ -469,7 +489,7 @@ LinkEditData: n_type: 0xF n_sect: 1 n_desc: 0 - n_value: 4294968240 + n_value: 4294968248 - n_strx: 8 n_type: 0xF n_sect: 1 @@ -507,10 +527,15 @@ LinkEditData: n_value: 4294968208 - n_strx: 121 n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968240 + - n_strx: 144 + n_type: 0xF n_sect: 2 n_desc: 0 n_value: 4294983680 - - n_strx: 136 + - n_strx: 159 n_type: 0xF n_sect: 1 n_desc: 16 @@ -526,11 +551,13 @@ LinkEditData: - _function2_copy1 - _function2_copy2 - _function1 + - _function_bad_stmt_seq - _global_result - __mh_execute_header DWARF: debug_str: - '' + - 'Facebook clang version 15.82.1 (https://git.internal.tfbnw.net/repos/git/ro/osmeta/external/llvm-project 3c2ff7d3c6ef63fb9a83574cee1b376c969bd5ec)' - merged_funcs_test.cpp - '/' - . @@ -547,6 +574,7 @@ DWARF: - function2_copy1 - function2_copy2 - function1 + - function_bad_stmt_seq - main - sum debug_abbrev: @@ -793,9 +821,9 @@ DWARF: AddressSize: 0x8 Descriptors: - Address: 0x100000338 - Length: 0xD0 + Length: 0xE4 debug_info: - - Length: 0x397 + - Length: 0x3D8 Version: 4 AbbrevTableID: 0 AbbrOffset: 0x0 @@ -803,31 +831,31 @@ DWARF: Entries: - AbbrCode: 0x1 Values: - - Value: 0x0 - - Value: 0x21 - Value: 0x1 - - Value: 0x17 + - Value: 0x21 + - Value: 0x94 + - Value: 0xAA - Value: 0x0 - - Value: 0x19 + - Value: 0xAC - Value: 0x1 - Value: 0x100000338 - - Value: 0xD0 + - Value: 0xE4 - AbbrCode: 0x2 Values: - - Value: 0x1B + - Value: 0xAE - Value: 0x43 - Value: 0x1 - Value: 0x1 - Value: 0x2 - Value: 0x9 - BlockData: [ 0x3, 0x0, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0, + BlockData: [ 0x3, 0x0, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0 ] - AbbrCode: 0x3 Values: - Value: 0x48 - AbbrCode: 0x4 Values: - - Value: 0x29 + - Value: 0xBC - Value: 0x5 - Value: 0x4 - AbbrCode: 0x5 @@ -839,7 +867,7 @@ DWARF: - Value: 0x1 BlockData: [ 0x6F ] - Value: 0x1 - - Value: 0x2D + - Value: 0xC0 - Value: 0x1 - Value: 0x4 - Value: 0x48 @@ -848,21 +876,21 @@ DWARF: - AbbrCode: 0x6 Values: - Value: 0x0 - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0x4 - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x39 - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0x5 - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x5C - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0x6 - Value: 0x48 @@ -876,7 +904,7 @@ DWARF: - Value: 0x1 BlockData: [ 0x6F ] - Value: 0x1 - - Value: 0x48 + - Value: 0xDB - Value: 0x1 - Value: 0xB - Value: 0x48 @@ -885,21 +913,21 @@ DWARF: - AbbrCode: 0x6 Values: - Value: 0x7F - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0xB - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0xB8 - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0xC - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0xDB - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0xD - Value: 0x48 @@ -912,7 +940,7 @@ DWARF: - Value: 0x1 BlockData: [ 0x6D ] - Value: 0x1 - - Value: 0x58 + - Value: 0xEB - Value: 0x1 - Value: 0x12 - Value: 0x48 @@ -921,20 +949,20 @@ DWARF: - AbbrCode: 0x6 Values: - Value: 0xFE - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0x12 - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x137 - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0x14 - Value: 0x48 - AbbrCode: 0x9 Values: - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0x13 - Value: 0x48 @@ -951,7 +979,7 @@ DWARF: - Value: 0x1 BlockData: [ 0x6D ] - Value: 0x1 - - Value: 0x68 + - Value: 0xFB - Value: 0x1 - Value: 0x19 - Value: 0x48 @@ -960,20 +988,20 @@ DWARF: - AbbrCode: 0x6 Values: - Value: 0x15A - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0x19 - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x193 - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0x1B - Value: 0x48 - AbbrCode: 0x9 Values: - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0x1A - Value: 0x48 @@ -984,7 +1012,7 @@ DWARF: - AbbrCode: 0x0 - AbbrCode: 0xB Values: - - Value: 0x78 + - Value: 0x10B - Value: 0x1 - Value: 0x20 - Value: 0x48 @@ -993,19 +1021,19 @@ DWARF: - Value: 0x1 - AbbrCode: 0xC Values: - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0x20 - Value: 0x48 - AbbrCode: 0x9 Values: - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0x22 - Value: 0x48 - AbbrCode: 0x9 Values: - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0x21 - Value: 0x48 @@ -1018,7 +1046,7 @@ DWARF: - Value: 0x1 BlockData: [ 0x6D ] - Value: 0x1 - - Value: 0x89 + - Value: 0x11C - Value: 0x1 - Value: 0x27 - Value: 0x48 @@ -1027,21 +1055,21 @@ DWARF: - AbbrCode: 0x6 Values: - Value: 0x1B6 - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0x27 - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x1EF - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0x28 - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x214 - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0x29 - Value: 0x48 @@ -1075,7 +1103,7 @@ DWARF: - Value: 0x1 BlockData: [ 0x6D ] - Value: 0x1 - - Value: 0x99 + - Value: 0x12C - Value: 0x1 - Value: 0x2E - Value: 0x48 @@ -1084,21 +1112,21 @@ DWARF: - AbbrCode: 0x6 Values: - Value: 0x27F - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0x2E - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x2B8 - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0x2F - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x2DD - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0x30 - Value: 0x48 @@ -1132,7 +1160,7 @@ DWARF: - Value: 0x1 BlockData: [ 0x6D ] - Value: 0x1 - - Value: 0xA9 + - Value: 0x13C - Value: 0x1 - Value: 0x35 - Value: 0x48 @@ -1141,20 +1169,20 @@ DWARF: - AbbrCode: 0x6 Values: - Value: 0x348 - - Value: 0x3D + - Value: 0xD0 - Value: 0x1 - Value: 0x35 - Value: 0x48 - AbbrCode: 0x7 Values: - Value: 0x381 - - Value: 0x41 + - Value: 0xD4 - Value: 0x1 - Value: 0x37 - Value: 0x48 - AbbrCode: 0x9 Values: - - Value: 0x3F + - Value: 0xD2 - Value: 0x1 - Value: 0x36 - Value: 0x48 @@ -1163,31 +1191,54 @@ DWARF: - Value: 0x1BB - Value: 0x1000003A0 - AbbrCode: 0x0 - - AbbrCode: 0x8 + - AbbrCode: 0x5 Values: - Value: 0x1000003B0 - - Value: 0x58 - - Value: 0x112 + - Value: 0x8 + - Value: 0x1 + - Value: 0xFFFFFFFF + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1 + - Value: 0x146 + - Value: 0x1 + - Value: 0x3E + - Value: 0x48 + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x6 + Values: + - Value: 0x3A4 + - Value: 0xD0 + - Value: 0x1 + - Value: 0x3F + - Value: 0x48 + - AbbrCode: 0x0 + - AbbrCode: 0x8 + Values: + - Value: 0x1000003B8 + - Value: 0x64 + - Value: 0x12B - Value: 0x1 BlockData: [ 0x6D ] - Value: 0x1 - - Value: 0xB3 + - Value: 0x15C - Value: 0x1 - - Value: 0x3C + - Value: 0x44 - Value: 0x48 - Value: 0x1 - Value: 0x1 - AbbrCode: 0x7 Values: - - Value: 0x3A4 - - Value: 0xB8 + - Value: 0x3DD + - Value: 0x161 - Value: 0x1 - - Value: 0x3D + - Value: 0x45 - Value: 0x48 - AbbrCode: 0x10 Values: - Value: 0x2B9 - - Value: 0x1000003C4 + - Value: 0x1000003CC - AbbrCode: 0x11 Values: - Value: 0x1 @@ -1198,7 +1249,7 @@ DWARF: - AbbrCode: 0x10 Values: - Value: 0x23A - - Value: 0x1000003D0 + - Value: 0x1000003D8 - AbbrCode: 0x11 Values: - Value: 0x1 @@ -1209,7 +1260,7 @@ DWARF: - AbbrCode: 0x10 Values: - Value: 0x9A - - Value: 0x1000003DC + - Value: 0x1000003E4 - AbbrCode: 0x11 Values: - Value: 0x1 @@ -1220,7 +1271,7 @@ DWARF: - AbbrCode: 0x10 Values: - Value: 0x139 - - Value: 0x1000003E8 + - Value: 0x1000003F0 - AbbrCode: 0x11 Values: - Value: 0x1 @@ -1231,7 +1282,7 @@ DWARF: - AbbrCode: 0x10 Values: - Value: 0x1BB - - Value: 0x1000003F8 + - Value: 0x100000400 - AbbrCode: 0x11 Values: - Value: 0x1 @@ -1239,10 +1290,21 @@ DWARF: - Value: 0x1 BlockData: [ 0x3B ] - AbbrCode: 0x0 + - AbbrCode: 0x10 + Values: + - Value: 0x30D + - Value: 0x10000040C + - AbbrCode: 0x11 + Values: + - Value: 0x1 + BlockData: [ 0x50 ] + - Value: 0x2 + BlockData: [ 0x83, 0x0 ] + - AbbrCode: 0x0 - AbbrCode: 0x0 - AbbrCode: 0x0 debug_line: - - Length: 319 + - Length: 353 Version: 4 PrologueLength: 45 MinInstLength: 1 @@ -1495,8 +1557,31 @@ DWARF: ExtLen: 9 SubOpcode: DW_LNE_set_address Data: 4294968240 + - Opcode: DW_LNS_set_column + Data: 14 + - Opcode: DW_LNS_set_prologue_end + Data: 0 - Opcode: DW_LNS_advance_line - SData: 59 + SData: 64 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_set_column + Data: 5 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0x4A + Data: 0 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4294968248 + - Opcode: DW_LNS_advance_line + SData: 67 Data: 0 - Opcode: DW_LNS_copy Data: 0 @@ -1539,6 +1624,18 @@ DWARF: - Opcode: 0x82 Data: 0 - Opcode: DW_LNS_set_column + Data: 12 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: DW_LNS_set_column + Data: 9 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0x82 + Data: 0 + - Opcode: DW_LNS_set_column Data: 5 - Opcode: DW_LNS_negate_stmt Data: 0 diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp index 7f8c55a..020b1b5 100644 --- a/llvm/tools/llvm-config/llvm-config.cpp +++ b/llvm/tools/llvm-config/llvm-config.cpp @@ -76,7 +76,7 @@ enum LinkMode { /// libraries. /// \param GetComponentNames - Get the component names instead of the /// library name. -static void VisitComponent(const std::string &Name, +static void visitComponent(const std::string &Name, const StringMap<AvailableComponent *> &ComponentMap, std::set<AvailableComponent *> &VisitedComponents, std::vector<std::string> &RequiredLibs, @@ -89,9 +89,8 @@ static void VisitComponent(const std::string &Name, AvailableComponent *AC = ComponentMap.lookup(Name); if (!AC) { errs() << "Can't find component: '" << Name << "' in the map. Available components are: "; - for (const auto &Component : ComponentMap) { + for (const auto &Component : ComponentMap) errs() << "'" << Component.first() << "' "; - } errs() << "\n"; report_fatal_error("abort"); } @@ -108,9 +107,11 @@ static void VisitComponent(const std::string &Name, return; // Otherwise, visit all the dependencies. - for (unsigned i = 0; AC->RequiredLibraries[i]; ++i) { - VisitComponent(AC->RequiredLibraries[i], ComponentMap, VisitedComponents, - RequiredLibs, IncludeNonInstalled, GetComponentNames, + for (const char *Lib : AC->RequiredLibraries) { + if (!Lib) + break; + visitComponent(Lib, ComponentMap, VisitedComponents, RequiredLibs, + IncludeNonInstalled, GetComponentNames, GetComponentLibraryPath, Missing, DirSep); } @@ -118,17 +119,17 @@ static void VisitComponent(const std::string &Name, // not populated by llvm-build, but later in the process and loaded from // ExtensionDependencies.inc. if (Name == "extensions") { - for (auto const &AvailableExtension : AvailableExtensions) { - for (const char *const *Iter = &AvailableExtension.RequiredLibraries[0]; - *Iter; ++Iter) { - AvailableComponent *AC = ComponentMap.lookup(*Iter); - if (!AC) { - RequiredLibs.push_back(*Iter); - } else { - VisitComponent(*Iter, ComponentMap, VisitedComponents, RequiredLibs, + for (const ExtensionDescriptor &AvailableExtension : AvailableExtensions) { + for (const char *Lib : AvailableExtension.RequiredLibraries) { + if (!Lib) + break; + AvailableComponent *AC = ComponentMap.lookup(Lib); + if (!AC) + RequiredLibs.push_back(Lib); + else + visitComponent(Lib, ComponentMap, VisitedComponents, RequiredLibs, IncludeNonInstalled, GetComponentNames, GetComponentLibraryPath, Missing, DirSep); - } } } } @@ -159,11 +160,13 @@ static void VisitComponent(const std::string &Name, /// \param IncludeNonInstalled - Whether non-installed components should be /// reported. /// \param GetComponentNames - True if one would prefer the component names. -static std::vector<std::string> ComputeLibsForComponents( - const std::vector<StringRef> &Components, bool IncludeNonInstalled, - bool GetComponentNames, const std::function<std::string(const StringRef &)> - *GetComponentLibraryPath, - std::vector<std::string> *Missing, const std::string &DirSep) { +static std::vector<std::string> +computeLibsForComponents(ArrayRef<StringRef> Components, + bool IncludeNonInstalled, bool GetComponentNames, + const std::function<std::string(const StringRef &)> + *GetComponentLibraryPath, + std::vector<std::string> *Missing, + const std::string &DirSep) { std::vector<std::string> RequiredLibs; std::set<AvailableComponent *> VisitedComponents; @@ -173,18 +176,17 @@ static std::vector<std::string> ComputeLibsForComponents( ComponentMap[AC.Name] = &AC; // Visit the components. - for (unsigned i = 0, e = Components.size(); i != e; ++i) { + for (StringRef Component : Components) { // Users are allowed to provide mixed case component names. - std::string ComponentLower = Components[i].lower(); + std::string ComponentLower = Component.lower(); // Validate that the user supplied a valid component name. if (!ComponentMap.count(ComponentLower)) { - llvm::errs() << "llvm-config: unknown component name: " << Components[i] - << "\n"; + errs() << "llvm-config: unknown component name: " << Component << "\n"; exit(1); } - VisitComponent(ComponentLower, ComponentMap, VisitedComponents, + visitComponent(ComponentLower, ComponentMap, VisitedComponents, RequiredLibs, IncludeNonInstalled, GetComponentNames, GetComponentLibraryPath, Missing, DirSep); } @@ -196,8 +198,6 @@ static std::vector<std::string> ComputeLibsForComponents( return RequiredLibs; } -/* *** */ - static void usage(bool ExitWithFailure = true) { errs() << "\ usage: llvm-config <OPTION>... [<COMPONENT>...]\n\ @@ -244,18 +244,18 @@ Typical components:\n\ } /// Compute the path to the main executable. -std::string GetExecutablePath(const char *Argv0) { +static std::string getExecutablePath(const char *Argv0) { // This just needs to be some symbol in the binary; C++ doesn't // allow taking the address of ::main however. - void *P = (void *)(intptr_t)GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); + void *P = (void *)(intptr_t)getExecutablePath; + return sys::fs::getMainExecutable(Argv0, P); } /// Expand the semi-colon delimited LLVM_DYLIB_COMPONENTS into /// the full list of components. -std::vector<std::string> GetAllDyLibComponents(const bool IsInDevelopmentTree, - const bool GetComponentNames, - const std::string &DirSep) { +static std::vector<std::string> +getAllDyLibComponents(const bool IsInDevelopmentTree, + const bool GetComponentNames, const std::string &DirSep) { std::vector<StringRef> DyLibComponents; StringRef DyLibComponentsStr(LLVM_DYLIB_COMPONENTS); @@ -263,15 +263,14 @@ std::vector<std::string> GetAllDyLibComponents(const bool IsInDevelopmentTree, while (true) { const size_t NextOffset = DyLibComponentsStr.find(';', Offset); DyLibComponents.push_back(DyLibComponentsStr.substr(Offset, NextOffset-Offset)); - if (NextOffset == std::string::npos) { + if (NextOffset == std::string::npos) break; - } Offset = NextOffset + 1; } assert(!DyLibComponents.empty()); - return ComputeLibsForComponents(DyLibComponents, + return computeLibsForComponents(DyLibComponents, /*IncludeNonInstalled=*/IsInDevelopmentTree, GetComponentNames, nullptr, nullptr, DirSep); } @@ -288,7 +287,7 @@ int main(int argc, char **argv) { // tree. bool IsInDevelopmentTree; enum { CMakeStyle, CMakeBuildModeStyle } DevelopmentTreeLayout; - llvm::SmallString<256> CurrentPath(GetExecutablePath(argv[0])); + SmallString<256> CurrentPath(getExecutablePath(argv[0])); std::string CurrentExecPrefix; std::string ActiveObjRoot; @@ -453,13 +452,12 @@ int main(int argc, char **argv) { StringRef &Out) { if (Lib.starts_with(StaticPrefix) || Lib.starts_with(SharedPrefix)) { unsigned FromEnd; - if (Lib.ends_with(StaticExt)) { + if (Lib.ends_with(StaticExt)) FromEnd = StaticExt.size() + 1; - } else if (Lib.ends_with(SharedExt)) { + else if (Lib.ends_with(SharedExt)) FromEnd = SharedExt.size() + 1; - } else { + else FromEnd = 0; - } if (FromEnd != 0) { unsigned FromStart = Lib.starts_with(SharedPrefix) @@ -496,11 +494,10 @@ int main(int argc, char **argv) { /// Get the full path for a possibly shared component library. auto GetComponentLibraryPath = [&](const StringRef &Name, const bool Shared) { auto LibFileName = GetComponentLibraryFileName(Name, Shared); - if (Shared) { + if (Shared) return (SharedDir + DirSep + LibFileName).str(); - } else { + else return (StaticDir + DirSep + LibFileName).str(); - } }; raw_ostream &OS = outs(); @@ -555,20 +552,14 @@ int main(int argc, char **argv) { llvm::replace(path, '/', '\\'); if (DyLibExists && !sys::fs::exists(path)) { Components = - GetAllDyLibComponents(IsInDevelopmentTree, true, DirSep); + getAllDyLibComponents(IsInDevelopmentTree, true, DirSep); llvm::sort(Components); break; } } } - for (unsigned I = 0; I < Components.size(); ++I) { - if (I) { - OS << ' '; - } - - OS << Components[I]; - } + interleave(Components, OS, " "); OS << '\n'; } else if (Arg == "--targets-built") { OS << LLVM_TARGETS_BUILT << '\n'; @@ -633,7 +624,7 @@ int main(int argc, char **argv) { return GetComponentLibraryPath(Name, LinkMode == LinkModeShared); }; std::vector<std::string> MissingLibs; - std::vector<std::string> RequiredLibs = ComputeLibsForComponents( + std::vector<std::string> RequiredLibs = computeLibsForComponents( Components, /*IncludeNonInstalled=*/IsInDevelopmentTree, false, &GetComponentLibraryPathFunction, &MissingLibs, DirSep); @@ -666,11 +657,10 @@ int main(int argc, char **argv) { if (PrintSharedMode) { std::unordered_set<std::string> FullDyLibComponents; std::vector<std::string> DyLibComponents = - GetAllDyLibComponents(IsInDevelopmentTree, false, DirSep); + getAllDyLibComponents(IsInDevelopmentTree, false, DirSep); - for (auto &Component : DyLibComponents) { + for (auto &Component : DyLibComponents) FullDyLibComponents.insert(Component); - } DyLibComponents.clear(); for (auto &Lib : RequiredLibs) { @@ -681,13 +671,11 @@ int main(int argc, char **argv) { } FullDyLibComponents.clear(); - if (LinkMode == LinkModeShared) { + if (LinkMode == LinkModeShared) OS << "shared\n"; - return 0; - } else { + else OS << "static\n"; - return 0; - } + return 0; } if (PrintLibs || PrintLibNames || PrintLibFiles) { @@ -716,17 +704,10 @@ int main(int argc, char **argv) { } }; - if (LinkMode == LinkModeShared && LinkDyLib) { + if (LinkMode == LinkModeShared && LinkDyLib) PrintForLib(DyLibName); - } else { - for (unsigned i = 0, e = RequiredLibs.size(); i != e; ++i) { - auto Lib = RequiredLibs[i]; - if (i) - OS << ' '; - - PrintForLib(Lib); - } - } + else + interleave(RequiredLibs, OS, PrintForLib, " "); OS << '\n'; } diff --git a/llvm/tools/llvm-xray/func-id-helper.h b/llvm/tools/llvm-xray/func-id-helper.h index d99fb7c..d1a6628 100644 --- a/llvm/tools/llvm-xray/func-id-helper.h +++ b/llvm/tools/llvm-xray/func-id-helper.h @@ -17,8 +17,7 @@ #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include <unordered_map> -namespace llvm { -namespace xray { +namespace llvm::xray { // This class consolidates common operations related to Function IDs. class FuncIdConversionHelper { @@ -45,7 +44,6 @@ public: std::string FileLineAndColumn(int32_t FuncId) const; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_TOOLS_LLVM_XRAY_FUNC_ID_HELPER_H diff --git a/llvm/tools/llvm-xray/trie-node.h b/llvm/tools/llvm-xray/trie-node.h index 7bff814..b42b029 100644 --- a/llvm/tools/llvm-xray/trie-node.h +++ b/llvm/tools/llvm-xray/trie-node.h @@ -21,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +namespace llvm { /// A type to represent a trie of invocations. It is useful to construct a /// graph of these nodes from reading an XRay trace, such that each function /// call can be placed in a larger context. @@ -87,5 +88,6 @@ mergeTrieNodes(const TrieNode<T> &Left, const TrieNode<T> &Right, return Node; } +} // namespace llvm #endif // LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H diff --git a/llvm/tools/llvm-xray/xray-account.cpp b/llvm/tools/llvm-xray/xray-account.cpp index 24a3552..5168aa1 100644 --- a/llvm/tools/llvm-xray/xray-account.cpp +++ b/llvm/tools/llvm-xray/xray-account.cpp @@ -118,18 +118,16 @@ static cl::opt<std::string> static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap), cl::desc("Alias for -instr_map")); -namespace { - -template <class T, class U> void setMinMax(std::pair<T, T> &MM, U &&V) { +template <class T, class U> static void setMinMax(std::pair<T, T> &MM, U &&V) { if (MM.first == 0 || MM.second == 0) MM = std::make_pair(std::forward<U>(V), std::forward<U>(V)); else MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V)); } -template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); } - -} // namespace +template <class T> static T diff(T L, T R) { + return std::max(L, R) - std::min(L, R); +} using RecursionStatus = LatencyAccountant::FunctionStack::RecursionStatus; RecursionStatus &RecursionStatus::operator++() { @@ -143,6 +141,7 @@ RecursionStatus &RecursionStatus::operator++() { true); // Storage |= INT_MIN return *this; } + RecursionStatus &RecursionStatus::operator--() { auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage); assert(Depth > 0); @@ -153,6 +152,7 @@ RecursionStatus &RecursionStatus::operator--() { Bitfield::set<RecursionStatus::IsRecursive>(Storage, false); // Storage = 0 return *this; } + bool RecursionStatus::isRecursive() const { return Bitfield::get<RecursionStatus::IsRecursive>(Storage); // Storage s< 0 } @@ -270,8 +270,9 @@ struct ResultRow { std::string DebugInfo; std::string Function; }; +} // namespace -ResultRow getStats(MutableArrayRef<uint64_t> Timings) { +static ResultRow getStats(MutableArrayRef<uint64_t> Timings) { assert(!Timings.empty()); ResultRow R; R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0); @@ -296,8 +297,6 @@ ResultRow getStats(MutableArrayRef<uint64_t> Timings) { return R; } -} // namespace - using TupleType = std::tuple<int32_t, uint64_t, ResultRow>; template <typename F> @@ -417,11 +416,8 @@ void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS, }); } -using namespace llvm::xray; - -namespace llvm { -template <> struct format_provider<llvm::xray::RecordTypes> { - static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream, +template <> struct llvm::format_provider<RecordTypes> { + static void format(const RecordTypes &T, raw_ostream &Stream, StringRef Style) { switch (T) { case RecordTypes::ENTER: @@ -445,7 +441,6 @@ template <> struct format_provider<llvm::xray::RecordTypes> { } } }; -} // namespace llvm static CommandRegistration Unused(&Account, []() -> Error { InstrumentationMap Map; @@ -468,10 +463,10 @@ static CommandRegistration Unused(&Account, []() -> Error { const auto &FunctionAddresses = Map.getFunctionAddresses(); symbolize::LLVMSymbolizer Symbolizer; - llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer, - FunctionAddresses); - xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly, - AccountDeduceSiblingCalls); + FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer, + FunctionAddresses); + LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly, + AccountDeduceSiblingCalls); auto TraceOrErr = loadTraceFile(AccountInput); if (!TraceOrErr) return joinErrors( diff --git a/llvm/tools/llvm-xray/xray-account.h b/llvm/tools/llvm-xray/xray-account.h index 0f24f93..c4e20731af 100644 --- a/llvm/tools/llvm-xray/xray-account.h +++ b/llvm/tools/llvm-xray/xray-account.h @@ -21,8 +21,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { class LatencyAccountant { public: @@ -107,7 +106,6 @@ private: template <class F> void exportStats(const XRayFileHeader &Header, F fn) const; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H diff --git a/llvm/tools/llvm-xray/xray-color-helper.h b/llvm/tools/llvm-xray/xray-color-helper.h index 3141e90..ae95c92 100644 --- a/llvm/tools/llvm-xray/xray-color-helper.h +++ b/llvm/tools/llvm-xray/xray-color-helper.h @@ -16,8 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include <tuple> -namespace llvm { -namespace xray { +namespace llvm::xray { /// The color helper class it a healper class which allows you to easily get a /// color in a gradient. This is used to color-code edges in XRay-Graph tools. @@ -82,6 +81,6 @@ public: // Convert a tuple to a string static std::string getColorString(std::tuple<uint8_t, uint8_t, uint8_t> t); }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray + #endif diff --git a/llvm/tools/llvm-xray/xray-converter.cpp b/llvm/tools/llvm-xray/xray-converter.cpp index 34832eb..0f45fef 100644 --- a/llvm/tools/llvm-xray/xray-converter.cpp +++ b/llvm/tools/llvm-xray/xray-converter.cpp @@ -176,13 +176,14 @@ struct StackIdData { // unique ID. SmallVector<TrieNode<StackIdData> *, 4> siblings; }; +} // namespace using StackTrieNode = TrieNode<StackIdData>; // A helper function to find the sibling nodes for an encountered function in a // thread of execution. Relies on the invariant that each time a new node is // traversed in a thread, sibling bidirectional pointers are maintained. -SmallVector<StackTrieNode *, 4> +static SmallVector<StackTrieNode *, 4> findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, const DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> &StackRootsByThreadId) { @@ -213,7 +214,7 @@ findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, // StackTrie representing the function call stack. If no node exists, creates // the node. Assigns unique IDs to stacks newly encountered among all threads // and keeps sibling links up to when creating new nodes. -StackTrieNode *findOrCreateStackNode( +static StackTrieNode *findOrCreateStackNode( StackTrieNode *Parent, int32_t FuncId, uint32_t TId, DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> &StackRootsByThreadId, DenseMap<unsigned, StackTrieNode *> &StacksByStackId, unsigned *id_counter, @@ -244,12 +245,13 @@ StackTrieNode *findOrCreateStackNode( return CurrentStack; } -void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId, - uint32_t TId, uint32_t PId, bool Symbolize, - const FuncIdConversionHelper &FuncIdHelper, - double EventTimestampUs, - const StackTrieNode &StackCursor, - StringRef FunctionPhenotype) { +static void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, + int32_t FuncId, uint32_t TId, uint32_t PId, + bool Symbolize, + const FuncIdConversionHelper &FuncIdHelper, + double EventTimestampUs, + const StackTrieNode &StackCursor, + StringRef FunctionPhenotype) { OS << " "; if (Version >= 3) { OS << llvm::formatv( @@ -269,8 +271,6 @@ void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId, } } -} // namespace - void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, raw_ostream &OS) { const auto &FH = Records.getFileHeader(); @@ -364,9 +364,6 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, OS << "}\n"; // Close the JSON entry. } -namespace llvm { -namespace xray { - static CommandRegistration Unused(&Convert, []() -> Error { // FIXME: Support conversion to BINARY when upgrading XRay trace versions. InstrumentationMap Map; @@ -386,9 +383,9 @@ static CommandRegistration Unused(&Convert, []() -> Error { if (Demangle.getPosition() < NoDemangle.getPosition()) SymbolizerOpts.Demangle = false; symbolize::LLVMSymbolizer Symbolizer(SymbolizerOpts); - llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer, - FunctionAddresses); - llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize); + FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer, + FunctionAddresses); + TraceConverter TC(FuncIdHelper, ConvertSymbolize); std::error_code EC; raw_fd_ostream OS(ConvertOutput, EC, ConvertOutputFormat == ConvertFormats::BINARY @@ -420,6 +417,3 @@ static CommandRegistration Unused(&Convert, []() -> Error { } return Error::success(); }); - -} // namespace xray -} // namespace llvm diff --git a/llvm/tools/llvm-xray/xray-converter.h b/llvm/tools/llvm-xray/xray-converter.h index db6d2b1..0f8efa4 100644 --- a/llvm/tools/llvm-xray/xray-converter.h +++ b/llvm/tools/llvm-xray/xray-converter.h @@ -17,8 +17,7 @@ #include "llvm/XRay/Trace.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { class TraceConverter { FuncIdConversionHelper &FuncIdHelper; @@ -37,7 +36,6 @@ public: void exportAsChromeTraceEventFormat(const Trace &Records, raw_ostream &OS); }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_TOOLS_LLVM_XRAY_XRAY_CONVERTER_H diff --git a/llvm/tools/llvm-xray/xray-extract.cpp b/llvm/tools/llvm-xray/xray-extract.cpp index 52767a0..70fe011 100644 --- a/llvm/tools/llvm-xray/xray-extract.cpp +++ b/llvm/tools/llvm-xray/xray-extract.cpp @@ -52,10 +52,8 @@ static cl::opt<bool> NoDemangle("no-demangle", cl::desc("don't demangle symbols"), cl::sub(Extract)); -namespace { - -void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS, - FuncIdConversionHelper &FH) { +static void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS, + FuncIdConversionHelper &FH) { // First we translate the sleds into the YAMLXRaySledEntry objects in a deque. std::vector<YAMLXRaySledEntry> YAMLSleds; auto Sleds = Map.sleds(); @@ -72,8 +70,6 @@ void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS, Out << YAMLSleds; } -} // namespace - static CommandRegistration Unused(&Extract, []() -> Error { auto InstrumentationMapOrError = loadInstrumentationMap(ExtractInput); if (!InstrumentationMapOrError) @@ -94,8 +90,8 @@ static CommandRegistration Unused(&Extract, []() -> Error { if (Demangle.getPosition() < NoDemangle.getPosition()) opts.Demangle = false; symbolize::LLVMSymbolizer Symbolizer(opts); - llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer, - FunctionAddresses); + FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer, + FunctionAddresses); exportAsYAML(*InstrumentationMapOrError, OS, FuncIdHelper); return Error::success(); }); diff --git a/llvm/tools/llvm-xray/xray-graph-diff.cpp b/llvm/tools/llvm-xray/xray-graph-diff.cpp index b5c63ab..66799091 100644 --- a/llvm/tools/llvm-xray/xray-graph-diff.cpp +++ b/llvm/tools/llvm-xray/xray-graph-diff.cpp @@ -236,6 +236,7 @@ Expected<GraphDiffRenderer> GraphDiffRenderer::Factory::getGraphDiffRenderer() { return R; } + // Returns the Relative change With respect to LeftStat between LeftStat // and RightStat. static double statRelDiff(const GraphDiffRenderer::TimeStat &LeftStat, @@ -363,9 +364,8 @@ void GraphDiffRenderer::exportGraphAsDOT(raw_ostream &OS, StatType EdgeLabel, StringMap<int32_t> VertexNo; int i = 0; - for (const auto &V : G.vertices()) { + for (const auto &V : G.vertices()) VertexNo[V.first] = i++; - } ColorHelper H(ColorHelper::DivergingScheme::PiYG); diff --git a/llvm/tools/llvm-xray/xray-graph-diff.h b/llvm/tools/llvm-xray/xray-graph-diff.h index c2b2a93..709f1bf 100644 --- a/llvm/tools/llvm-xray/xray-graph-diff.h +++ b/llvm/tools/llvm-xray/xray-graph-diff.h @@ -17,8 +17,7 @@ #include "xray-graph.h" #include "llvm/XRay/Graph.h" -namespace llvm { -namespace xray { +namespace llvm::xray { // This class creates a graph representing the difference between two // xray-graphs And allows you to print it to a dot file, with optional color @@ -66,7 +65,6 @@ public: const GraphT &getGraph() { return G; } }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif diff --git a/llvm/tools/llvm-xray/xray-graph.cpp b/llvm/tools/llvm-xray/xray-graph.cpp index a97aacf..71d3029 100644 --- a/llvm/tools/llvm-xray/xray-graph.cpp +++ b/llvm/tools/llvm-xray/xray-graph.cpp @@ -153,7 +153,9 @@ static cl::opt<GraphRenderer::StatType> GraphVertexColorType( static cl::alias GraphVertexColorType2("b", cl::aliasopt(GraphVertexColorType), cl::desc("Alias for -edge-label")); -template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); } +template <class T> static T diff(T L, T R) { + return std::max(L, R) - std::min(L, R); +} // Updates the statistics for a GraphRenderer::TimeStat static void updateStat(GraphRenderer::TimeStat &S, int64_t L) { @@ -459,10 +461,9 @@ Expected<GraphRenderer> GraphRenderer::Factory::getGraphRenderer() { symbolize::LLVMSymbolizer Symbolizer; const auto &Header = Trace.getFileHeader(); - llvm::xray::FuncIdConversionHelper FuncIdHelper(InstrMap, Symbolizer, - FunctionAddresses); + FuncIdConversionHelper FuncIdHelper(InstrMap, Symbolizer, FunctionAddresses); - xray::GraphRenderer GR(FuncIdHelper, DeduceSiblingCalls); + GraphRenderer GR(FuncIdHelper, DeduceSiblingCalls); for (const auto &Record : Trace) { auto E = GR.accountRecord(Record); if (!E) diff --git a/llvm/tools/llvm-xray/xray-graph.h b/llvm/tools/llvm-xray/xray-graph.h index 23372d4..fd96449 100644 --- a/llvm/tools/llvm-xray/xray-graph.h +++ b/llvm/tools/llvm-xray/xray-graph.h @@ -28,8 +28,7 @@ #include "llvm/XRay/Trace.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { /// A class encapsulating the logic related to analyzing XRay traces, producting /// Graphs from them and then exporting those graphs for review. @@ -225,7 +224,6 @@ inline GraphRenderer::TimeStat operator/(const GraphRenderer::TimeStat &A, A.Pct90 / B.Pct90, A.Pct99 / B.Pct99, A.Max / B.Max, A.Sum / B.Sum}; } -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // XRAY_GRAPH_H diff --git a/llvm/tools/llvm-xray/xray-registry.cpp b/llvm/tools/llvm-xray/xray-registry.cpp index 34ac07e..ae18f8d 100644 --- a/llvm/tools/llvm-xray/xray-registry.cpp +++ b/llvm/tools/llvm-xray/xray-registry.cpp @@ -13,8 +13,8 @@ #include <unordered_map> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace xray; using HandlerType = std::function<Error()>; @@ -31,12 +31,9 @@ CommandRegistration::CommandRegistration(cl::SubCommand *SC, getCommands()[SC] = Command; } -HandlerType dispatch(cl::SubCommand *SC) { +HandlerType xray::dispatch(cl::SubCommand *SC) { auto It = getCommands().find(SC); assert(It != getCommands().end() && "Attempting to dispatch on un-registered SubCommand."); return It->second; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/tools/llvm-xray/xray-registry.h b/llvm/tools/llvm-xray/xray-registry.h index d6fae78..3921a42 100644 --- a/llvm/tools/llvm-xray/xray-registry.h +++ b/llvm/tools/llvm-xray/xray-registry.h @@ -15,8 +15,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" -namespace llvm { -namespace xray { +namespace llvm::xray { // Use |CommandRegistration| as a global initialiser that registers a function // and associates it with |SC|. This requires that a command has not been @@ -34,7 +33,6 @@ struct CommandRegistration { // Requires that |SC| is not null and has an associated function to it. std::function<Error()> dispatch(cl::SubCommand *SC); -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // TOOLS_LLVM_XRAY_XRAY_REGISTRY_H diff --git a/llvm/tools/llvm-xray/xray-stacks.cpp b/llvm/tools/llvm-xray/xray-stacks.cpp index b11d732..8101cad 100644 --- a/llvm/tools/llvm-xray/xray-stacks.cpp +++ b/llvm/tools/llvm-xray/xray-stacks.cpp @@ -107,6 +107,8 @@ static cl::opt<AggregationType> RequestedAggregation( "of all callees.")), cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME)); +namespace { + /// A helper struct to work with formatv and XRayRecords. Makes it easier to /// use instrumentation map names or addresses in formatted output. struct format_xray_record : public FormatAdapter<XRayRecord> { @@ -252,10 +254,9 @@ private: /// maintain an index of unique functions, and provide a means of iterating /// through all the instrumented call stacks which we know about. -namespace { struct StackDuration { - llvm::SmallVector<int64_t, 4> TerminalDurations; - llvm::SmallVector<int64_t, 4> IntermediateDurations; + SmallVector<int64_t, 4> TerminalDurations; + SmallVector<int64_t, 4> IntermediateDurations; }; } // namespace @@ -310,6 +311,7 @@ std::size_t GetValueForStack(const StackTrieNode *Node) { return 0; } +namespace { class StackTrie { // Avoid the magic number of 4 propagated through the code with an alias. // We use this SmallVector to track the root nodes in a call graph. @@ -649,6 +651,7 @@ public: OS << "\n"; } }; +} // namespace static std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error, const XRayRecord &Record, diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index c19fc19..a383415 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -45,32 +45,30 @@ using namespace llvm; using namespace opt_tool; -namespace llvm { -cl::opt<bool> DebugifyEach( +cl::opt<bool> llvm::DebugifyEach( "debugify-each", cl::desc("Start each pass with debugify and end it with check-debugify")); -cl::opt<std::string> - DebugifyExport("debugify-export", - cl::desc("Export per-pass debugify statistics to this file"), - cl::value_desc("filename")); +cl::opt<std::string> llvm::DebugifyExport( + "debugify-export", + cl::desc("Export per-pass debugify statistics to this file"), + cl::value_desc("filename")); -cl::opt<bool> VerifyEachDebugInfoPreserve( +cl::opt<bool> llvm::VerifyEachDebugInfoPreserve( "verify-each-debuginfo-preserve", cl::desc("Start each pass with collecting and end it with checking of " "debug info preservation.")); +cl::opt<std::string> llvm::VerifyDIPreserveExport( + "verify-di-preserve-export", + cl::desc("Export debug info preservation failures into " + "specified (JSON) file (should be abs path as we use" + " append mode to insert new JSON objects)"), + cl::value_desc("filename"), cl::init("")); + static cl::opt<bool> EnableLoopFusion("enable-loopfusion", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFuse Pass")); -cl::opt<std::string> - VerifyDIPreserveExport("verify-di-preserve-export", - cl::desc("Export debug info preservation failures into " - "specified (JSON) file (should be abs path as we use" - " append mode to insert new JSON objects)"), - cl::value_desc("filename"), cl::init("")); - -} // namespace llvm enum class DebugLogging { None, Normal, Verbose, Quiet }; @@ -356,7 +354,7 @@ bool llvm::runPassPipeline( ToolOutputFile *Out, ToolOutputFile *ThinLTOLinkOut, ToolOutputFile *OptRemarkFile, StringRef PassPipeline, ArrayRef<PassPlugin> PassPlugins, - ArrayRef<std::function<void(llvm::PassBuilder &)>> PassBuilderCallbacks, + ArrayRef<std::function<void(PassBuilder &)>> PassBuilderCallbacks, OutputKind OK, VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, bool EnableDebugify, bool VerifyDIPreserve, diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h index 6c21d6c..042d5d4 100644 --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -52,7 +52,7 @@ enum PGOKind { SampleUse }; enum CSPGOKind { NoCSPGO, CSInstrGen, CSInstrUse }; -} +} // namespace opt_tool void printPasses(raw_ostream &OS); @@ -70,7 +70,7 @@ bool runPassPipeline( ToolOutputFile *Out, ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile, StringRef PassPipeline, ArrayRef<PassPlugin> PassPlugins, - ArrayRef<std::function<void(llvm::PassBuilder &)>> PassBuilderCallbacks, + ArrayRef<std::function<void(PassBuilder &)>> PassBuilderCallbacks, opt_tool::OutputKind OK, opt_tool::VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index fc8bd665..ad0f8f4a 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -14,12 +14,14 @@ #include "llvm/ADT/ArrayRef.h" #include <functional> +using namespace llvm; + namespace llvm { class PassBuilder; } -extern "C" int optMain(int argc, char **argv, - llvm::ArrayRef<std::function<void(llvm::PassBuilder &)>> - PassBuilderCallbacks); +extern "C" int +optMain(int argc, char **argv, + ArrayRef<std::function<void(PassBuilder &)>> PassBuilderCallbacks); int main(int argc, char **argv) { return optMain(argc, argv, {}); } diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index 2ac8de7..f70db31 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -296,23 +296,25 @@ static CodeGenOptLevel GetCodeGenOptLevel() { return static_cast<CodeGenOptLevel>(unsigned(CodeGenOptLevelCL)); } +namespace { struct TimeTracerRAII { TimeTracerRAII(StringRef ProgramName) { if (TimeTrace) timeTraceProfilerInitialize(TimeTraceGranularity, ProgramName); } ~TimeTracerRAII() { - if (TimeTrace) { - if (auto E = timeTraceProfilerWrite(TimeTraceFile, OutputFilename)) { - handleAllErrors(std::move(E), [&](const StringError &SE) { - errs() << SE.getMessage() << "\n"; - }); - return; - } - timeTraceProfilerCleanup(); + if (!TimeTrace) + return; + if (auto E = timeTraceProfilerWrite(TimeTraceFile, OutputFilename)) { + handleAllErrors(std::move(E), [&](const StringError &SE) { + errs() << SE.getMessage() << "\n"; + }); + return; } + timeTraceProfilerCleanup(); } }; +} // namespace // For use in NPM transition. Currently this contains most codegen-specific // passes. Remove passes from here when porting to the NPM. @@ -377,10 +379,10 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "callbrprepare", "scalarizer", }; - for (const auto &P : PassNamePrefix) + for (StringLiteral P : PassNamePrefix) if (Pass.starts_with(P)) return true; - for (const auto &P : PassNameContain) + for (StringLiteral P : PassNameContain) if (Pass.contains(P)) return true; return llvm::is_contained(PassNameExact, Pass); @@ -388,7 +390,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { // For use in NPM transition. static bool shouldForceLegacyPM() { - for (const auto &P : PassList) { + for (const PassInfo *P : PassList) { StringRef Arg = P->getPassArgument(); if (shouldPinPassToLegacyPM(Arg)) return true; @@ -399,9 +401,9 @@ static bool shouldForceLegacyPM() { //===----------------------------------------------------------------------===// // main for opt // -extern "C" int optMain( - int argc, char **argv, - ArrayRef<std::function<void(llvm::PassBuilder &)>> PassBuilderCallbacks) { +extern "C" int +optMain(int argc, char **argv, + ArrayRef<std::function<void(PassBuilder &)>> PassBuilderCallbacks) { InitLLVM X(argc, argv); // Enable debug stream buffering. @@ -673,7 +675,7 @@ extern "C" int optMain( else { // Disable individual builtin functions in TargetLibraryInfo. LibFunc F; - for (auto &FuncName : DisableBuiltins) { + for (const std::string &FuncName : DisableBuiltins) { if (TLII.getLibFunc(FuncName, F)) TLII.setUnavailable(F); else { @@ -683,7 +685,7 @@ extern "C" int optMain( } } - for (auto &FuncName : EnableBuiltins) { + for (const std::string &FuncName : EnableBuiltins) { if (TLII.getLibFunc(FuncName, F)) TLII.setAvailable(F); else { @@ -814,9 +816,8 @@ extern "C" int optMain( Passes.add(TPC); } - // Create a new optimization pass for each one specified on the command line - for (unsigned i = 0; i < PassList.size(); ++i) { - const PassInfo *PassInf = PassList[i]; + // Create a new optimization pass for each one specified on the command line. + for (const PassInfo *PassInf : PassList) { if (PassInf->getNormalCtor()) { Pass *P = PassInf->getNormalCtor()(); if (P) { @@ -826,9 +827,10 @@ extern "C" int optMain( if (VerifyEach) Passes.add(createVerifierPass()); } - } else + } else { errs() << argv[0] << ": cannot create pass: " << PassInf->getPassName() << "\n"; + } } // Check that the module is well formed on completion of optimization diff --git a/llvm/unittests/ADT/BitTest.cpp b/llvm/unittests/ADT/BitTest.cpp index 5b3df91..e8041bb 100644 --- a/llvm/unittests/ADT/BitTest.cpp +++ b/llvm/unittests/ADT/BitTest.cpp @@ -286,6 +286,28 @@ TEST(BitTest, BitCeilConstexpr) { static_assert(llvm::bit_ceil_constexpr(257u) == 512); } +TEST(BitTest, CountrZeroConstexpr) { + static_assert(llvm::countr_zero_constexpr(0u) == 32); + static_assert(llvm::countr_zero_constexpr(1u) == 0); + static_assert(llvm::countr_zero_constexpr(2u) == 1); + static_assert(llvm::countr_zero_constexpr(3u) == 0); + static_assert(llvm::countr_zero_constexpr(4u) == 2); + static_assert(llvm::countr_zero_constexpr(8u) == 3); + static_assert(llvm::countr_zero_constexpr(0x80000000u) == 31); + + static_assert(llvm::countr_zero_constexpr(0ull) == 64); + static_assert(llvm::countr_zero_constexpr(1ull) == 0); + static_assert(llvm::countr_zero_constexpr(0x100000000ull) == 32); + static_assert(llvm::countr_zero_constexpr(0x8000000000000000ull) == 63); + + static_assert( + llvm::countr_zero_constexpr(std::numeric_limits<uint16_t>::max()) == 0); + static_assert( + llvm::countr_zero_constexpr(std::numeric_limits<uint32_t>::max()) == 0); + static_assert( + llvm::countr_zero_constexpr(std::numeric_limits<uint64_t>::max()) == 0); +} + TEST(BitTest, CountlZero) { uint8_t Z8 = 0; uint16_t Z16 = 0; diff --git a/llvm/unittests/ADT/StringSwitchTest.cpp b/llvm/unittests/ADT/StringSwitchTest.cpp index 0fbf371..d88a0ff 100644 --- a/llvm/unittests/ADT/StringSwitchTest.cpp +++ b/llvm/unittests/ADT/StringSwitchTest.cpp @@ -159,7 +159,7 @@ TEST(StringSwitchTest, Cases) { return llvm::StringSwitch<OSType>(S) .Cases(StringLiteral::withInnerNUL("wind\0ws"), "win32", "winnt", OSType::Windows) - .Cases("linux", "unix", "*nix", "posix", OSType::Linux) + .Cases({"linux", "unix", "*nix", "posix"}, OSType::Linux) .Cases({"macos", "osx"}, OSType::MacOS) .Default(OSType::Unknown); }; @@ -191,7 +191,7 @@ TEST(StringSwitchTest, CasesLower) { return llvm::StringSwitch<OSType>(S) .CasesLower(StringLiteral::withInnerNUL("wind\0ws"), "win32", "winnt", OSType::Windows) - .CasesLower("linux", "unix", "*nix", "posix", OSType::Linux) + .CasesLower({"linux", "unix", "*nix", "posix"}, OSType::Linux) .CasesLower({"macos", "osx"}, OSType::MacOS) .Default(OSType::Unknown); }; @@ -230,13 +230,13 @@ TEST(StringSwitchTest, CasesCopies) { // Check that evaluating multiple cases does not cause unnecessary copies. unsigned NumCopies = 0; - llvm::StringSwitch<Copyable, void>("baz").Cases("foo", "bar", "baz", "qux", + llvm::StringSwitch<Copyable, void>("baz").Cases({"foo", "bar", "baz", "qux"}, Copyable{NumCopies}); EXPECT_EQ(NumCopies, 1u); NumCopies = 0; llvm::StringSwitch<Copyable, void>("baz").CasesLower( - "Foo", "Bar", "Baz", "Qux", Copyable{NumCopies}); + {"Foo", "Bar", "Baz", "Qux"}, Copyable{NumCopies}); EXPECT_EQ(NumCopies, 1u); } diff --git a/llvm/unittests/CAS/CASTestConfig.cpp b/llvm/unittests/CAS/CASTestConfig.cpp index 29e2db4..91d0970 100644 --- a/llvm/unittests/CAS/CASTestConfig.cpp +++ b/llvm/unittests/CAS/CASTestConfig.cpp @@ -19,3 +19,17 @@ static CASTestingEnv createInMemory(int I) { INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest, ::testing::Values(createInMemory)); + +#if LLVM_ENABLE_ONDISK_CAS +namespace llvm::cas::ondisk { +extern void setMaxMappingSize(uint64_t Size); +} // namespace llvm::cas::ondisk + +void setMaxOnDiskCASMappingSize() { + static std::once_flag Flag; + std::call_once( + Flag, [] { llvm::cas::ondisk::setMaxMappingSize(100 * 1024 * 1024); }); +} +#else +void setMaxOnDiskCASMappingSize() {} +#endif /* LLVM_ENABLE_ONDISK_CAS */ diff --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h index 8093a0b..c08968b 100644 --- a/llvm/unittests/CAS/CASTestConfig.h +++ b/llvm/unittests/CAS/CASTestConfig.h @@ -18,6 +18,17 @@ struct CASTestingEnv { std::unique_ptr<llvm::cas::ActionCache> Cache; }; +void setMaxOnDiskCASMappingSize(); + +// Test fixture for on-disk data base tests. +class OnDiskCASTest : public ::testing::Test { +protected: + void SetUp() override { + // Use a smaller database size for testing to conserve disk space. + setMaxOnDiskCASMappingSize(); + } +}; + class CASTest : public testing::TestWithParam<std::function<CASTestingEnv(int)>> { protected: diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index ee40e6c..da469f7 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -1,3 +1,19 @@ +set(ONDISK_CAS_TEST_SOURCES + OnDiskGraphDBTest.cpp + OnDiskDataAllocatorTest.cpp + OnDiskKeyValueDBTest.cpp + OnDiskTrieRawHashMapTest.cpp + ProgramTest.cpp + ) + +set(LLVM_OPTIONAL_SOURCES + ${ONDISK_CAS_TEST_SOURCES} + ) + +if (NOT LLVM_ENABLE_ONDISK_CAS) + unset(ONDISK_CAS_TEST_SOURCES) +endif() + set(LLVM_LINK_COMPONENTS Support CAS @@ -8,9 +24,8 @@ add_llvm_unittest(CASTests ActionCacheTest.cpp CASTestConfig.cpp ObjectStoreTest.cpp - OnDiskDataAllocatorTest.cpp - OnDiskTrieRawHashMapTest.cpp - ProgramTest.cpp + + ${ONDISK_CAS_TEST_SOURCES} ) target_link_libraries(CASTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/CAS/OnDiskCommonUtils.h b/llvm/unittests/CAS/OnDiskCommonUtils.h new file mode 100644 index 0000000..57c8c22 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskCommonUtils.h @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Helper functions to test OnDiskCASDatabases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/BuiltinObjectHasher.h" +#include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/Support/BLAKE3.h" +#include "llvm/Testing/Support/Error.h" + +namespace llvm::unittest::cas { + +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +using HasherT = BLAKE3; +using HashType = decltype(HasherT::hash(std::declval<ArrayRef<uint8_t> &>())); +using ValueType = std::array<char, 20>; + +inline HashType digest(StringRef Data, ArrayRef<ArrayRef<uint8_t>> RefHashes) { + return BuiltinObjectHasher<HasherT>::hashObject( + RefHashes, arrayRefFromStringRef<char>(Data)); +} + +inline ObjectID digest(OnDiskGraphDB &DB, StringRef Data, + ArrayRef<ObjectID> Refs) { + SmallVector<ArrayRef<uint8_t>, 8> RefHashes; + for (ObjectID Ref : Refs) + RefHashes.push_back(DB.getDigest(Ref)); + HashType Digest = digest(Data, RefHashes); + std::optional<ObjectID> ID; + EXPECT_THAT_ERROR(DB.getReference(Digest).moveInto(ID), Succeeded()); + return *ID; +} + +inline HashType digest(StringRef Data) { + return HasherT::hash(arrayRefFromStringRef(Data)); +} + +inline ValueType valueFromString(StringRef S) { + ValueType Val; + llvm::copy(S.substr(0, sizeof(Val)), Val.data()); + return Val; +} + +inline Expected<ObjectID> store(OnDiskGraphDB &DB, StringRef Data, + ArrayRef<ObjectID> Refs) { + ObjectID ID = digest(DB, Data, Refs); + if (Error E = DB.store(ID, Refs, arrayRefFromStringRef<char>(Data))) + return std::move(E); + return ID; +} + +inline Error printTree(OnDiskGraphDB &DB, ObjectID ID, raw_ostream &OS, + unsigned Indent = 0) { + std::optional<ondisk::ObjectHandle> Obj; + if (Error E = DB.load(ID).moveInto(Obj)) + return E; + if (!Obj) + return Error::success(); + OS.indent(Indent) << toStringRef(DB.getObjectData(*Obj)) << '\n'; + for (ObjectID Ref : DB.getObjectRefs(*Obj)) { + if (Error E = printTree(DB, Ref, OS, Indent + 2)) + return E; + } + return Error::success(); +} + +} // namespace llvm::unittest::cas diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp new file mode 100644 index 0000000..58f5dcc6 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp @@ -0,0 +1,310 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CASTestConfig.h" +#include "OnDiskCommonUtils.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; +using namespace llvm::unittest::cas; + +TEST_F(OnDiskCASTest, OnDiskGraphDBTest) { + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> DB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB), + Succeeded()); + + auto digest = [&DB](StringRef Data, ArrayRef<ObjectID> Refs) -> ObjectID { + return ::digest(*DB, Data, Refs); + }; + + auto store = [&](StringRef Data, + ArrayRef<ObjectID> Refs) -> Expected<ObjectID> { + return ::store(*DB, Data, Refs); + }; + + std::optional<ObjectID> ID1; + ASSERT_THAT_ERROR(store("hello", {}).moveInto(ID1), Succeeded()); + + std::optional<ondisk::ObjectHandle> Obj1; + ASSERT_THAT_ERROR(DB->load(*ID1).moveInto(Obj1), Succeeded()); + ASSERT_TRUE(Obj1.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj1)), "hello"); + + ArrayRef<uint8_t> Digest1 = DB->getDigest(*ID1); + std::optional<ObjectID> ID2; + ASSERT_THAT_ERROR(DB->getReference(Digest1).moveInto(ID2), Succeeded()); + EXPECT_EQ(ID1, ID2); + + ObjectID ID3 = digest("world", {}); + EXPECT_FALSE(DB->containsObject(ID3)); + std::optional<ondisk::ObjectHandle> Obj2; + ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded()); + EXPECT_FALSE(Obj2.has_value()); + + ASSERT_THAT_ERROR(DB->store(ID3, {}, arrayRefFromStringRef<char>("world")), + Succeeded()); + EXPECT_TRUE(DB->containsObject(ID3)); + ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded()); + ASSERT_TRUE(Obj2.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj2)), "world"); + + size_t LargeDataSize = 256LL * 1024LL; // 256K. + // The precise size number is not important, we mainly check that the large + // object will be properly accounted for. + EXPECT_TRUE(DB->getStorageSize() > 10 && + DB->getStorageSize() < LargeDataSize); + + SmallString<16> Buffer; + Buffer.resize(LargeDataSize); + ASSERT_THAT_ERROR(store(Buffer, {}).moveInto(ID1), Succeeded()); + size_t StorageSize = DB->getStorageSize(); + EXPECT_TRUE(StorageSize > LargeDataSize); + + // Close & re-open the DB and check that it reports the same storage size. + DB.reset(); + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB), + Succeeded()); + EXPECT_EQ(DB->getStorageSize(), StorageSize); +} + +TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInSingleNode) { + unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> UpstreamDB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType)) + .moveInto(UpstreamDB), + Succeeded()); + { + std::optional<ObjectID> ID1; + ASSERT_THAT_ERROR(store(*UpstreamDB, "hello", {}).moveInto(ID1), + Succeeded()); + std::optional<ObjectID> ID2; + ASSERT_THAT_ERROR(store(*UpstreamDB, "another", {}).moveInto(ID2), + Succeeded()); + std::optional<ObjectID> ID3; + ASSERT_THAT_ERROR(store(*UpstreamDB, "world", {*ID1, *ID2}).moveInto(ID3), + Succeeded()); + } + + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> DB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + std::move(UpstreamDB), + OnDiskGraphDB::FaultInPolicy::SingleNode) + .moveInto(DB), + Succeeded()); + + ObjectID ID1 = digest(*DB, "hello", {}); + ObjectID ID2 = digest(*DB, "another", {}); + ObjectID ID3 = digest(*DB, "world", {ID1, ID2}); + ObjectID ID4 = digest(*DB, "world", {}); + + EXPECT_TRUE(DB->containsObject(ID1)); + EXPECT_TRUE(DB->containsObject(ID2)); + EXPECT_TRUE(DB->containsObject(ID3)); + EXPECT_FALSE(DB->containsObject(ID4)); + + EXPECT_TRUE(DB->getExistingReference(digest("hello", {})).has_value()); + EXPECT_TRUE(DB->getExistingReference(DB->getDigest(ID3)).has_value()); + EXPECT_FALSE(DB->getExistingReference(digest("world", {})).has_value()); + + { + std::optional<ondisk::ObjectHandle> Obj; + ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello"); + auto Refs = DB->getObjectRefs(*Obj); + EXPECT_TRUE(Refs.empty()); + } + { + std::optional<ondisk::ObjectHandle> Obj; + ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "world"); + auto Refs = DB->getObjectRefs(*Obj); + ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2); + EXPECT_EQ(Refs.begin()[0], ID1); + EXPECT_EQ(Refs.begin()[1], ID2); + } + { + std::optional<ondisk::ObjectHandle> Obj; + ASSERT_THAT_ERROR(DB->load(ID4).moveInto(Obj), Succeeded()); + EXPECT_FALSE(Obj.has_value()); + } + + // Re-open the primary without chaining, to verify the data were copied from + // the upstream. + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + /*UpstreamDB=*/nullptr, + OnDiskGraphDB::FaultInPolicy::SingleNode) + .moveInto(DB), + Succeeded()); + ID1 = digest(*DB, "hello", {}); + ID2 = digest(*DB, "another", {}); + ID3 = digest(*DB, "world", {ID1, ID2}); + EXPECT_TRUE(DB->containsObject(ID1)); + EXPECT_FALSE(DB->containsObject(ID2)); + EXPECT_TRUE(DB->containsObject(ID3)); + { + std::optional<ondisk::ObjectHandle> Obj; + ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello"); + auto Refs = DB->getObjectRefs(*Obj); + EXPECT_TRUE(Refs.empty()); + } +} + +TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInFullTree) { + unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> UpstreamDB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType)) + .moveInto(UpstreamDB), + Succeeded()); + HashType RootHash; + { + std::optional<ObjectID> ID11; + ASSERT_THAT_ERROR(store(*UpstreamDB, "11", {}).moveInto(ID11), Succeeded()); + std::optional<ObjectID> ID121; + ASSERT_THAT_ERROR(store(*UpstreamDB, "121", {}).moveInto(ID121), + Succeeded()); + std::optional<ObjectID> ID12; + ASSERT_THAT_ERROR(store(*UpstreamDB, "12", {*ID121}).moveInto(ID12), + Succeeded()); + std::optional<ObjectID> ID1; + ASSERT_THAT_ERROR(store(*UpstreamDB, "1", {*ID11, *ID12}).moveInto(ID1), + Succeeded()); + std::optional<ObjectID> ID21; + ASSERT_THAT_ERROR(store(*UpstreamDB, "21", {}).moveInto(ID21), Succeeded()); + std::optional<ObjectID> ID22; + ASSERT_THAT_ERROR(store(*UpstreamDB, "22", {}).moveInto(ID22), Succeeded()); + std::optional<ObjectID> ID2; + ASSERT_THAT_ERROR( + store(*UpstreamDB, "2", {*ID12, *ID21, *ID22}).moveInto(ID2), + Succeeded()); + std::optional<ObjectID> IDRoot; + ASSERT_THAT_ERROR(store(*UpstreamDB, "root", {*ID1, *ID2}).moveInto(IDRoot), + Succeeded()); + ArrayRef<uint8_t> Digest = UpstreamDB->getDigest(*IDRoot); + ASSERT_EQ(Digest.size(), RootHash.size()); + llvm::copy(Digest, RootHash.data()); + } + + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> DB; + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + std::move(UpstreamDB), + OnDiskGraphDB::FaultInPolicy::FullTree) + .moveInto(DB), + Succeeded()); + + { + std::optional<ObjectID> IDRoot; + ASSERT_THAT_ERROR(DB->getReference(RootHash).moveInto(IDRoot), Succeeded()); + std::optional<ondisk::ObjectHandle> Obj; + ASSERT_THAT_ERROR(DB->load(*IDRoot).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "root"); + auto Refs = DB->getObjectRefs(*Obj); + ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2); + } + + // Re-open the primary without chaining, to verify the data were copied from + // the upstream. + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + /*UpstreamDB=*/nullptr, + OnDiskGraphDB::FaultInPolicy::FullTree) + .moveInto(DB), + Succeeded()); + + std::optional<ObjectID> IDRoot; + ASSERT_THAT_ERROR(DB->getReference(RootHash).moveInto(IDRoot), Succeeded()); + std::string PrintedTree; + raw_string_ostream OS(PrintedTree); + ASSERT_THAT_ERROR(printTree(*DB, *IDRoot, OS), Succeeded()); + StringRef Expected = R"(root + 1 + 11 + 12 + 121 + 2 + 12 + 121 + 21 + 22 +)"; + EXPECT_EQ(PrintedTree, Expected); +} + +TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) { + auto tryFaultInPolicyConflict = [](OnDiskGraphDB::FaultInPolicy Policy1, + OnDiskGraphDB::FaultInPolicy Policy2) { + unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> UpstreamDB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType)) + .moveInto(UpstreamDB), + Succeeded()); + + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> DB; + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", + sizeof(HashType), + std::move(UpstreamDB), Policy1) + .moveInto(DB), + Succeeded()); + DB.reset(); + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", + sizeof(HashType), + std::move(UpstreamDB), Policy2) + .moveInto(DB), + Failed()); + }; + // Open as 'single', then as 'full'. + tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::SingleNode, + OnDiskGraphDB::FaultInPolicy::FullTree); + // Open as 'full', then as 'single'. + tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::FullTree, + OnDiskGraphDB::FaultInPolicy::SingleNode); +} + +#if defined(EXPENSIVE_CHECKS) +TEST_F(OnDiskCASTest, OnDiskGraphDBSpaceLimit) { + setMaxOnDiskCASMappingSize(); + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr<OnDiskGraphDB> DB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB), + Succeeded()); + + std::optional<ObjectID> ID; + std::string Data(500, '0'); + auto storeSmallObject = [&]() { + SmallVector<ObjectID, 1> Refs; + if (ID) + Refs.push_back(*ID); + ASSERT_THAT_ERROR(store(*DB, Data, Refs).moveInto(ID), Succeeded()); + }; + + // Insert enough small elements to overflow the data pool. + for (unsigned I = 0; I < 1024 * 256; ++I) + storeSmallObject(); + + EXPECT_GE(DB->getHardStorageLimitUtilization(), 99U); +} +#endif diff --git a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp new file mode 100644 index 0000000..89c03b8 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "CASTestConfig.h" +#include "OnDiskCommonUtils.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; +using namespace llvm::unittest::cas; + +TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) { + unittest::TempDir Temp("ondiskkv", /*Unique=*/true); + std::unique_ptr<OnDiskKeyValueDB> DB; + ASSERT_THAT_ERROR(OnDiskKeyValueDB::open(Temp.path(), "blake3", + sizeof(HashType), "test", + sizeof(ValueType)) + .moveInto(DB), + Succeeded()); + { + std::optional<ArrayRef<char>> Val; + ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded()); + EXPECT_FALSE(Val.has_value()); + } + + ValueType ValW = valueFromString("world"); + ArrayRef<char> Val; + ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded()); + EXPECT_EQ(Val, ArrayRef(ValW)); + ASSERT_THAT_ERROR( + DB->put(digest("hello"), valueFromString("other")).moveInto(Val), + Succeeded()); + EXPECT_EQ(Val, ArrayRef(ValW)); + + { + std::optional<ArrayRef<char>> Val; + ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded()); + EXPECT_TRUE(Val.has_value()); + EXPECT_EQ(*Val, ArrayRef(ValW)); + } + + // Validate + { + auto ValidateFunc = [](FileOffset Offset, ArrayRef<char> Data) -> Error { + EXPECT_EQ(Data.size(), sizeof(ValueType)); + return Error::success(); + }; + ASSERT_THAT_ERROR(DB->validate(ValidateFunc), Succeeded()); + } + + // Size + { + size_t InitSize = DB->getStorageSize(); + unsigned InitPrecent = DB->getHardStorageLimitUtilization(); + + // Insert a lot of entries. + for (unsigned I = 0; I < 1024 * 100; ++I) { + std::string Index = Twine(I).str(); + ArrayRef<char> Val; + ASSERT_THAT_ERROR( + DB->put(digest(Index), valueFromString(Index)).moveInto(Val), + Succeeded()); + } + + EXPECT_GT(DB->getStorageSize(), InitSize); + EXPECT_GT(DB->getHardStorageLimitUtilization(), InitPrecent); + } +} diff --git a/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg b/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg index 9f93bac..d3eb987 100644 --- a/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg +++ b/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg @@ -2,5 +2,5 @@ import lit.formats config.name = "googletest-cmd-wrapper" config.test_format = lit.formats.GoogleTest( - "DummySubDir", "Test" if "win32" in sys.platform else ".exe", [sys.executable] + "DummySubDir", "Test" if sys.platform in ["win32", "cygwin"] else ".exe", [sys.executable] ) diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index 5ae3515..b73aa04 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -153,11 +153,11 @@ target.markOpRecursivelyLegal<MyOp>([](MyOp op) { ... }); After the conversion target has been defined, a set of legalization patterns must be provided to transform illegal operations into legal ones. The patterns -supplied here have the same structure and restrictions as those described in the -main [Pattern](PatternRewriter.md) documentation. The patterns provided do not -need to generate operations that are directly legal on the target. The framework -will automatically build a graph of conversions to convert non-legal operations -into a set of legal ones. +supplied here have the same structure and similar restrictions as those +described in the main [Pattern](PatternRewriter.md) documentation. The patterns +provided do not need to generate operations that are directly legal on the +target. The framework will automatically build a graph of conversions to convert +non-legal operations into a set of legal ones. As an example, say you define a target that supports one operation: `foo.add`. When providing the following patterns: [`bar.add` -> `baz.add`, `baz.add` -> @@ -171,23 +171,12 @@ means that you don’t have to define a direct legalization pattern for `bar.add Along with the general `RewritePattern` classes, the conversion framework provides a special type of rewrite pattern that can be used when a pattern relies on interacting with constructs specific to the conversion process, the -`ConversionPattern`. For example, the conversion process does not necessarily -update operations in-place and instead creates a mapping of events such as -replacements and erasures, and only applies them when the entire conversion -process is successful. Certain classes of patterns rely on using the -updated/remapped operands of an operation, such as when the types of results -defined by an operation have changed. The general Rewrite Patterns can no longer -be used in these situations, as the types of the operands of the operation being -matched will not correspond with those expected by the user. This pattern -provides, as an additional argument to the `matchAndRewrite` method, the list -of operands that the operation should use after conversion. If an operand was -the result of a non-converted operation, for example if it was already legal, -the original operand is used. This means that the operands provided always have -a 1-1 non-null correspondence with the operands on the operation. The original -operands of the operation are still intact and may be inspected as normal. -These patterns also utilize a special `PatternRewriter`, -`ConversionPatternRewriter`, that provides special hooks for use with the -conversion infrastructure. +`ConversionPattern`. + +#### Remapped Operands / Adaptor +Conversion patterns have an additional `operands` / `adaptor` argument for the +`matchAndRewrite` method. These operands correspond to the most recent +replacement values of the respective operands of the matched operation. ```c++ struct MyConversionPattern : public ConversionPattern { @@ -200,6 +189,128 @@ struct MyConversionPattern : public ConversionPattern { }; ``` +Example: + +```mlir +%0 = "test.foo"() : () -> i1 // matched by pattern A +"test.bar"(%0) : (i1) -> () // matched by pattern B +``` + +Let's assume that the two patterns are applied back-to-back: first, pattern A +replaces `"test.foo"` with `"test.qux"`, an op that has a different result +type. The dialect conversion infrastructure has special support for such +type-changing IR modifications. + +```mlir +%0 = "test.qux"() : () -> i2 +%r = builtin.unrealized_conversion_cast %0 : i2 to i1 +"test.bar"(%r) : (i1) -> () +``` + +Simply swapping out the operand of `"test.bar"` during the `replaceOp` call +would be unsafe, because that would change the type of operand and, therefore, +potentially the semantics of the operation. Instead, the dialect conversion +driver (conceptually) inserts a `builtin.unrealized_conversion_cast` op that +connects the newly created `"test.qux"` op with the `"test.bar"` op, without +changing the types of the latter one. + +Now, the second pattern B is applied. The `operands` argument contains an SSA +value with the most recent replacement type (`%0` with type `i2`), whereas +querying the operand from the matched op still returns an SSA value with the +original operand type `i1`. + +Note: If the conversion pattern is instantiated with a type converter, the +`operands` argument contains SSA values whose types match the legalized operand +types as per the type converter. See [Type Safety](#type-safety) for more +details. + +Note: The dialect conversion framework does not guarantee the presence of any +particular value in the `operands` argument. The only thing that's guaranteed +is the type of the `operands` SSA values. E.g., instead of the actual +replacement values supplied to a `replaceOp` API call, `operands` may contain +results of transitory `builtin.unrealized_conversion_cast` ops that were +inserted by the conversion driver but typically fold away again throughout the +conversion process. + +#### Immediate vs. Delayed IR Modification + +The dialect conversion driver can operate in two modes: (a) rollback mode +(default) and (b) no-rollback mode. This can be controlled by +`ConversionConfig::allowPatternRollback`. When running in rollback mode, the +driver is able backtrack and roll back already applied patterns when the +current legalization path (sequence of pattern applications) gets stuck with +unlegalizable operations. + +When running in no-rollback mode, all IR modifications such as op replacement, +op erasure, op insertion or in-place op modification are applied immediately. + +When running in rollback mode, certain IR modifications are delayed to the end +of the conversion process. For example, a `ConversionPatternRewriter::eraseOp` +API call does not immediately erase the op, but just marks it for erasure. The +op will stay visible to patterns and IR traversals throughout the conversion +process. As another example, `replaceOp` and `replaceAllUsesWith` does not +immediately update users of the original SSA values. This step is also delayed +to the end of the conversion process. + +Delaying certain IR modifications has two benefits: (1) pattern rollback is +simpler because fewer IR modifications must be rolled back, (2) pointers of +erased operations / blocks are preserved upon rollback, and (3) patterns can +still access/traverse the original IR to some degree. However, additional +bookkeeping in the form of complex internal C++ data structures is required to +support pattern rollback. Running in rollback mode has a significant toll on +compilation time, is error-prone and makes debugging conversion passes more +complicated. Therefore, programmers are encouraged to run in no-rollback mode +when possible. + +The following table gives an overview of which IR changes are applied in a +delayed fasion in rollback mode. + +| Type | Rollback Mode | No-rollback Mode | +| ------------------------------------------------------- | ----------------- | ---------------- | +| Op Insertion / Movement (`create`/`insert`) | Immediate | Immediate | +| Op Replacement (`replaceOp`) | Delayed | Immediate | +| Op Erasure (`eraseOp`) | Delayed | Immediate | +| Op Modification (`modifyOpInPlace`) | Immediate | Immediate | +| Value Replacement (`replaceAllUsesWith`) | Delayed | Immediate | +| Block Insertion (`createBlock`) | Immediate | Immediate | +| Block Replacement | Not supported | Not supported | +| Block Erasure | Partly delayed | Immediate | +| Block Signature Conversion (`applySignatureConversion`) | Partially delayed | Immediate | +| Region / Block Inlining (`inlineBlockBefore`, etc.) | Partially delayed | Immediate | + +Value replacement is delayed and has different semantics in rollback mode: +Since the actual replacement is delayed to the end of the conversion process, +additional uses of the replaced value can be created after the +`replaceAllUsesWith` call. Those uses will also be replaced at the end of the +conversion process. + +Block replacement is not supported in either mode, because the rewriter +infrastructure currently has no API for replacing blocks: there is no overload +of `replaceAllUsesWith` that accepts `Block *`. + +Block erasure is partly delayed in rollback mode: the block is detached from +the IR graph, but not memory for the block is not released until the end of the +conversion process. This mechanism ensures that block pointers do not change +when a block erasure is rolled back. + +Block signature conversion is a combination of block insertion, op insertion, +value replacement and block erasure. In rollback mode, the first two steps are +immediate, but the last two steps are delayed. + +Region / block inlining is a combination of block / op insertion and +(optionally) value replacement. In rollback mode, the insertion steps are +immediate, but the replacement step is delayed. + +Note: When running in rollback mode, the conversion driver inserts fewer +transitory `builtin.unrealized_conversion_cast` ops. Such ops are needed less +frequently because certain IR modifications are delayed, making it unnecessary +to connect old (not yet rewritten) and new (already rewritten) IR in a +type-safe way. This has a negative effect on the debugging experience: when +dumping IR throughout the conversion process, users see a mixture of old and +new IR, but the way they are connected is not always visibile in the IR. Some +of that information is stored in internal C++ data structures that is not +visibile during an IR dump. + #### Type Safety The types of the remapped operands provided to a conversion pattern (through diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md index 8d4d2ca..c16ed57 100644 --- a/mlir/docs/Dialects/GPU.md +++ b/mlir/docs/Dialects/GPU.md @@ -121,7 +121,7 @@ func.func @main() { gpu.launch blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1) threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) { - gpu.printf "Hello from %d\n" %6 : index + gpu.printf "Hello from %d\n", %6 : index gpu.terminator } return diff --git a/mlir/include/mlir-c/Target/LLVMIR.h b/mlir/include/mlir-c/Target/LLVMIR.h index b5f9489..ec5ae88 100644 --- a/mlir/include/mlir-c/Target/LLVMIR.h +++ b/mlir/include/mlir-c/Target/LLVMIR.h @@ -33,6 +33,9 @@ extern "C" { MLIR_CAPI_EXPORTED LLVMModuleRef mlirTranslateModuleToLLVMIR(MlirOperation module, LLVMContextRef context); +MLIR_CAPI_EXPORTED char * +mlirTranslateModuleToLLVMIRToString(MlirOperation module); + struct MlirTypeFromLLVMIRTranslator { void *ptr; }; diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 987fc13..a6c6038 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -584,7 +584,7 @@ def GPU_DynamicSharedMemoryOp : GPU_Op<"dynamic_shared_memory", [Pure]> This operation provides a memref pointer to the start of dynamic shared memory, often referred to as workgroup memory. It's important to note that this dynamic shared memory needs to be allocated at kernel launch. One can - conveniently utilize `the dynamic_shared_memory_size` parameter of + conveniently utilize the `dynamic_shared_memory_size` parameter of `gpu.launch` for this purpose. Examples: diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index b8aa497..e2a60f5 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -152,12 +152,6 @@ mlir::ValueRange getDataOperands(mlir::Operation *accOp); /// Used to get a mutable range iterating over the data operands. mlir::MutableOperandRange getMutableDataOperands(mlir::Operation *accOp); -/// Used to obtain the enclosing compute construct operation that contains -/// the provided `region`. Returns nullptr if no compute construct operation -/// is found. The returns operation is one of types defined by -///`ACC_COMPUTE_CONSTRUCT_OPS`. -mlir::Operation *getEnclosingComputeOp(mlir::Region ®ion); - /// Used to check whether the provided `type` implements the `PointerLikeType` /// interface. inline bool isPointerLikeType(mlir::Type type) { diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 1eaa21b4..e78cdbe 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2787,10 +2787,14 @@ def AtomicReadOp : OpenACC_Op<"atomic.read", [AtomicReadOpInterface]> { let arguments = (ins OpenACC_PointerLikeType:$x, OpenACC_PointerLikeType:$v, - TypeAttr:$element_type); + TypeAttr:$element_type, Optional<I1>:$ifCond); let assemblyFormat = [{ + oilist( + `if` `(` $ifCond `)` + ) $v `=` $x - `:` type($v) `,` type($x) `,` $element_type attr-dict + `:` type($v) `,` type($x) `,` $element_type + attr-dict }]; let hasVerifier = 1; } @@ -2809,8 +2813,12 @@ def AtomicWriteOp : OpenACC_Op<"atomic.write",[AtomicWriteOpInterface]> { }]; let arguments = (ins OpenACC_PointerLikeType:$x, - AnyType:$expr); + AnyType:$expr, + Optional<I1>:$ifCond); let assemblyFormat = [{ + oilist( + `if` `(` $ifCond `)` + ) $x `=` $expr `:` type($x) `,` type($expr) attr-dict @@ -2850,10 +2858,15 @@ def AtomicUpdateOp : OpenACC_Op<"atomic.update", let arguments = (ins Arg<OpenACC_PointerLikeType, "Address of variable to be updated", - [MemRead, MemWrite]>:$x); + [MemRead, MemWrite]>:$x, + Optional<I1>:$ifCond); let regions = (region SizedRegion<1>:$region); let assemblyFormat = [{ - $x `:` type($x) $region attr-dict + oilist( + `if` `(` $ifCond `)` + ) + $x `:` type($x) + $region attr-dict }]; let hasVerifier = 1; let hasRegionVerifier = 1; @@ -2896,8 +2909,13 @@ def AtomicCaptureOp : OpenACC_Op<"atomic.capture", }]; + let arguments = (ins Optional<I1>:$ifCond); + let regions = (region SizedRegion<1>:$region); let assemblyFormat = [{ + oilist( + `if` `(` $ifCond `)` + ) $region attr-dict }]; let hasRegionVerifier = 1; diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtils.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtils.h new file mode 100644 index 0000000..378f434 --- /dev/null +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtils.h @@ -0,0 +1,44 @@ +//===- OpenACCUtils.h - OpenACC Utilities -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_OPENACC_OPENACCUTILS_H_ +#define MLIR_DIALECT_OPENACC_OPENACCUTILS_H_ + +#include "mlir/Dialect/OpenACC/OpenACC.h" + +namespace mlir { +namespace acc { + +/// Used to obtain the enclosing compute construct operation that contains +/// the provided `region`. Returns nullptr if no compute construct operation +/// is found. The returned operation is one of types defined by +/// `ACC_COMPUTE_CONSTRUCT_OPS`. +mlir::Operation *getEnclosingComputeOp(mlir::Region ®ion); + +/// Returns true if this value is only used by `acc.private` operations in the +/// `region`. +bool isOnlyUsedByPrivateClauses(mlir::Value val, mlir::Region ®ion); + +/// Returns true if this value is only used by `acc.reduction` operations in +/// the `region`. +bool isOnlyUsedByReductionClauses(mlir::Value val, mlir::Region ®ion); + +/// Looks for an OpenACC default attribute on the current operation `op` or in +/// a parent operation which encloses `op`. This is useful because OpenACC +/// specification notes that a visible default clause is the nearest default +/// clause appearing on the compute construct or a lexically containing data +/// construct. +std::optional<ClauseDefaultValue> getDefaultAttr(mlir::Operation *op); + +/// Get the type category of an OpenACC variable. +mlir::acc::VariableTypeCategory getTypeCategory(mlir::Value var); + +} // namespace acc +} // namespace mlir + +#endif // MLIR_DIALECT_OPENACC_OPENACCUTILS_H_ diff --git a/mlir/lib/Bindings/Python/DialectLLVM.cpp b/mlir/lib/Bindings/Python/DialectLLVM.cpp index 38de4a0..870a713 100644 --- a/mlir/lib/Bindings/Python/DialectLLVM.cpp +++ b/mlir/lib/Bindings/Python/DialectLLVM.cpp @@ -11,6 +11,7 @@ #include "mlir-c/Dialect/LLVM.h" #include "mlir-c/IR.h" #include "mlir-c/Support.h" +#include "mlir-c/Target/LLVMIR.h" #include "mlir/Bindings/Python/Diagnostics.h" #include "mlir/Bindings/Python/Nanobind.h" #include "mlir/Bindings/Python/NanobindAdaptors.h" @@ -24,7 +25,7 @@ using namespace mlir; using namespace mlir::python; using namespace mlir::python::nanobind_adaptors; -static void populateDialectLLVMSubmodule(const nanobind::module_ &m) { +static void populateDialectLLVMSubmodule(nanobind::module_ &m) { //===--------------------------------------------------------------------===// // StructType @@ -154,6 +155,16 @@ static void populateDialectLLVMSubmodule(const nanobind::module_ &m) { .def_property_readonly("address_space", [](MlirType type) { return mlirLLVMPointerTypeGetAddressSpace(type); }); + + m.def( + "translate_module_to_llvmir", + [](MlirOperation module) { + return mlirTranslateModuleToLLVMIRToString(module); + }, + // clang-format off + nb::sig("def translate_module_to_llvmir(module: " MAKE_MLIR_PYTHON_QUALNAME("ir.Operation") ") -> str"), + // clang-format on + "module"_a, nb::rv_policy::take_ownership); } NB_MODULE(_mlirDialectsLLVM, m) { diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index 045c0fb..c0a945e 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -1306,6 +1306,10 @@ PyType_Slot PyDenseElementsAttribute::slots[] = { e.restore(); nb::chain_error(PyExc_BufferError, "Error converting attribute to buffer"); return -1; + } catch (std::exception &e) { + nb::chain_error(PyExc_BufferError, + "Error converting attribute to buffer: %s", e.what()); + return -1; } view->obj = obj; view->ndim = 1; diff --git a/mlir/lib/CAPI/Target/LLVMIR.cpp b/mlir/lib/CAPI/Target/LLVMIR.cpp index 1c1912a..00229df 100644 --- a/mlir/lib/CAPI/Target/LLVMIR.cpp +++ b/mlir/lib/CAPI/Target/LLVMIR.cpp @@ -34,6 +34,15 @@ LLVMModuleRef mlirTranslateModuleToLLVMIR(MlirOperation module, return moduleRef; } +char *mlirTranslateModuleToLLVMIRToString(MlirOperation module) { + LLVMContextRef llvmCtx = LLVMContextCreate(); + LLVMModuleRef llvmModule = mlirTranslateModuleToLLVMIR(module, llvmCtx); + char *llvmir = LLVMPrintModuleToString(llvmModule); + LLVMDisposeModule(llvmModule); + LLVMContextDispose(llvmCtx); + return llvmir; +} + DEFINE_C_API_PTR_METHODS(MlirTypeFromLLVMIRTranslator, mlir::LLVM::TypeFromLLVMIRTranslator) diff --git a/mlir/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/CMakeLists.txt index 9f57627..7117520 100644 --- a/mlir/lib/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenACC/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) +add_subdirectory(Utils) add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index dcfe2c7..5ca0100 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -3842,7 +3842,8 @@ LogicalResult AtomicUpdateOp::canonicalize(AtomicUpdateOp op, } if (Value writeVal = op.getWriteOpVal()) { - rewriter.replaceOpWithNewOp<AtomicWriteOp>(op, op.getX(), writeVal); + rewriter.replaceOpWithNewOp<AtomicWriteOp>(op, op.getX(), writeVal, + op.getIfCond()); return success(); } @@ -4649,14 +4650,3 @@ mlir::acc::getMutableDataOperands(mlir::Operation *accOp) { .Default([&](mlir::Operation *) { return nullptr; })}; return dataOperands; } - -mlir::Operation *mlir::acc::getEnclosingComputeOp(mlir::Region ®ion) { - mlir::Operation *parentOp = region.getParentOp(); - while (parentOp) { - if (mlir::isa<ACC_COMPUTE_CONSTRUCT_OPS>(parentOp)) { - return parentOp; - } - parentOp = parentOp->getParentOp(); - } - return nullptr; -} diff --git a/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt new file mode 100644 index 0000000..68e1246 --- /dev/null +++ b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt @@ -0,0 +1,20 @@ +add_mlir_dialect_library(MLIROpenACCUtils + OpenACCUtils.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/OpenACC + + DEPENDS + MLIROpenACCPassIncGen + MLIROpenACCOpsIncGen + MLIROpenACCEnumsIncGen + MLIROpenACCAttributesIncGen + MLIROpenACCMPOpsInterfacesIncGen + MLIROpenACCOpsInterfacesIncGen + MLIROpenACCTypeInterfacesIncGen + + LINK_LIBS PUBLIC + MLIROpenACCDialect + MLIRIR + MLIRSupport +) diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtils.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtils.cpp new file mode 100644 index 0000000..1223325 --- /dev/null +++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtils.cpp @@ -0,0 +1,80 @@ +//===- OpenACCUtils.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/OpenACC/OpenACCUtils.h" + +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "llvm/ADT/TypeSwitch.h" + +mlir::Operation *mlir::acc::getEnclosingComputeOp(mlir::Region ®ion) { + mlir::Operation *parentOp = region.getParentOp(); + while (parentOp) { + if (mlir::isa<ACC_COMPUTE_CONSTRUCT_OPS>(parentOp)) + return parentOp; + parentOp = parentOp->getParentOp(); + } + return nullptr; +} + +template <typename OpTy> +static bool isOnlyUsedByOpClauses(mlir::Value val, mlir::Region ®ion) { + auto checkIfUsedOnlyByOpInside = [&](mlir::Operation *user) { + // For any users which are not in the current acc region, we can ignore. + // Return true so that it can be used in a `all_of` check. + if (!region.isAncestor(user->getParentRegion())) + return true; + return mlir::isa<OpTy>(user); + }; + + return llvm::all_of(val.getUsers(), checkIfUsedOnlyByOpInside); +} + +bool mlir::acc::isOnlyUsedByPrivateClauses(mlir::Value val, + mlir::Region ®ion) { + return isOnlyUsedByOpClauses<mlir::acc::PrivateOp>(val, region); +} + +bool mlir::acc::isOnlyUsedByReductionClauses(mlir::Value val, + mlir::Region ®ion) { + return isOnlyUsedByOpClauses<mlir::acc::ReductionOp>(val, region); +} + +std::optional<mlir::acc::ClauseDefaultValue> +mlir::acc::getDefaultAttr(Operation *op) { + std::optional<mlir::acc::ClauseDefaultValue> defaultAttr; + Operation *currOp = op; + + // Iterate outwards until a default clause is found (since OpenACC + // specification notes that a visible default clause is the nearest default + // clause appearing on the compute construct or a lexically containing data + // construct. + while (!defaultAttr.has_value() && currOp) { + defaultAttr = + llvm::TypeSwitch<mlir::Operation *, + std::optional<mlir::acc::ClauseDefaultValue>>(currOp) + .Case<ACC_COMPUTE_CONSTRUCT_OPS, mlir::acc::DataOp>( + [&](auto op) { return op.getDefaultAttr(); }) + .Default([&](Operation *) { return std::nullopt; }); + currOp = currOp->getParentOp(); + } + + return defaultAttr; +} + +mlir::acc::VariableTypeCategory mlir::acc::getTypeCategory(mlir::Value var) { + mlir::acc::VariableTypeCategory typeCategory = + mlir::acc::VariableTypeCategory::uncategorized; + if (auto mappableTy = dyn_cast<mlir::acc::MappableType>(var.getType())) + typeCategory = mappableTy.getTypeCategory(var); + else if (auto pointerLikeTy = + dyn_cast<mlir::acc::PointerLikeType>(var.getType())) + typeCategory = pointerLikeTy.getPointeeTypeCategory( + cast<TypedValue<mlir::acc::PointerLikeType>>(var), + pointerLikeTy.getElementType()); + return typeCategory; +} diff --git a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp index 6e79daa..eb956f2 100644 --- a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp +++ b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp @@ -157,7 +157,7 @@ static Type parseAnyType(DialectAsmParser &parser) { /// Checks if the given scale value is within the valid range of the expressed /// type. The `expressedType` argument is the floating-point type used for /// expressing the quantized values, and `scale` is the double value to check. -LogicalResult +static LogicalResult isScaleInExpressedTypeRange(function_ref<InFlightDiagnostic()> emitError, Type expressedType, double scale) { auto floatType = cast<FloatType>(expressedType); @@ -579,10 +579,10 @@ static void printUniformQuantizedPerAxisType(UniformQuantizedPerAxisType type, /// would print: /// /// {{1.0, 2.0}, {3.0:1, 4.0:9}} -void printDenseQuantizationParameters(ArrayRef<APFloat> scales, - ArrayRef<APInt> zeroPoints, - ArrayRef<int64_t> shape, - DialectAsmPrinter &out) { +static void printDenseQuantizationParameters(ArrayRef<APFloat> scales, + ArrayRef<APInt> zeroPoints, + ArrayRef<int64_t> shape, + DialectAsmPrinter &out) { int64_t rank = shape.size(); SmallVector<unsigned, 4> counter(rank, 0); unsigned openBrackets = 0; diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp index e3717aa..c7588b4 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp @@ -240,10 +240,10 @@ LogicalResult mlir::scf::peelForLoopFirstIteration(RewriterBase &b, ForOp forOp, loc, forOp.getUpperBound().getType(), splitBound); // Peel the first iteration. - IRMapping map; - map.map(forOp.getUpperBound(), splitBound); - firstIteration = cast<ForOp>(b.clone(*forOp.getOperation(), map)); - + firstIteration = cast<ForOp>(b.clone(*forOp.getOperation())); + b.modifyOpInPlace(firstIteration, [&]() { + firstIteration.getUpperBoundMutable().assign(splitBound); + }); // Update main loop with new lower bound. b.modifyOpInPlace(forOp, [&]() { forOp.getInitArgsMutable().assign(firstIteration->getResults()); diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td index 39fbab8..e8d2274 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td @@ -75,11 +75,3 @@ def ConvertComparisonIntoClamp2_#CmpClampPair[0] : Pat< )), (CmpClampPair[1] $input, $min, $max)>; } - -//===----------------------------------------------------------------------===// -// spirv.GL.Length -> spirv.GL.FAbs -//===----------------------------------------------------------------------===// - -def ConvertGLLengthToGLFAbs : Pat< - (SPIRV_GLLengthOp SPIRV_Float:$operand), - (SPIRV_GLFAbsOp $operand)>; diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp index 46acb8c..3ad8057 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp @@ -34,8 +34,8 @@ void populateSPIRVGLCanonicalizationPatterns(RewritePatternSet &results) { ConvertComparisonIntoClamp2_SPIRV_SLessThanOp, ConvertComparisonIntoClamp2_SPIRV_SLessThanEqualOp, ConvertComparisonIntoClamp2_SPIRV_ULessThanOp, - ConvertComparisonIntoClamp2_SPIRV_ULessThanEqualOp, - ConvertGLLengthToGLFAbs>(results.getContext()); + ConvertComparisonIntoClamp2_SPIRV_ULessThanEqualOp>( + results.getContext()); } } // namespace spirv } // namespace mlir diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index ffa96ad..84ff08a 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -601,6 +601,8 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Pybind EMBED_CAPI_LINK_LIBS MLIRCAPIIR MLIRCAPILLVM + # Misnomer - this is only the LLVMIR translation target. + MLIRCAPITarget ) declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Pybind diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py index b14ea68..2fbcbb0 100644 --- a/mlir/python/mlir/dialects/gpu/__init__.py +++ b/mlir/python/mlir/dialects/gpu/__init__.py @@ -6,7 +6,7 @@ from .._gpu_ops_gen import * from .._gpu_ops_gen import _Dialect from .._gpu_enum_gen import * from ..._mlir_libs._mlirDialectsGPU import * -from typing import Callable, Sequence, Union, Optional, List +from typing import Any, Callable, Sequence, Tuple, Union, Optional, List try: from ...ir import ( @@ -21,15 +21,24 @@ try: DictAttr, Attribute, DenseI32ArrayAttr, + Value, ) + from ...extras.meta import region_op + from ...extras import types as T + from ..arith import constant, ConstantOp from .._ods_common import ( get_default_loc_context as _get_default_loc_context, _cext as _ods_cext, + get_op_result_or_op_results, ) except ImportError as e: raise RuntimeError("Error loading imports from extension module") from e +def gpu_async_token(): + return Type.parse("!gpu.async.token") + + @_ods_cext.register_operation(_Dialect, replace=True) class GPUFuncOp(GPUFuncOp): __doc__ = GPUFuncOp.__doc__ @@ -151,3 +160,176 @@ class GPUFuncOp(GPUFuncOp): @property def arguments(self) -> Sequence[Type]: return self.function_type.value.inputs + + +def _convert_literal_to_constant(value: Union[int, ConstantOp, Value]) -> Value: + if isinstance(value, int): + return constant(T.index(), value) + elif isinstance(value, (ConstantOp, Value)): + return value + else: + raise ValueError(f"Invalid value: {value}") + + +@_ods_cext.register_operation(_Dialect, replace=True) +class LaunchFuncOp(LaunchFuncOp): + __doc__ = LaunchFuncOp.__doc__ + + def __init__( + self, + kernel: List[str], + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + kernel_operands: Optional[List[Value]] = None, + async_dependencies: Optional[List[Value]] = None, + dynamic_shared_memory_size: Optional[Value] = None, + async_object=None, + *, + loc=None, + ip=None, + ): + if async_dependencies is None: + async_dependencies = [] + async_token = None + if len(async_dependencies): + async_token = gpu_async_token() + + grid_size_x, grid_size_y, grid_size_z = map( + _convert_literal_to_constant, grid_size + ) + block_size_x, block_size_y, block_size_z = map( + _convert_literal_to_constant, block_size + ) + + super().__init__( + async_token, + async_dependencies, + kernel, + grid_size_x, + grid_size_y, + grid_size_z, + block_size_x, + block_size_y, + block_size_z, + kernel_operands, + dynamicSharedMemorySize=dynamic_shared_memory_size, + asyncObject=async_object, + loc=loc, + ip=ip, + ) + + +def launch_func( + kernel: List[str], + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + kernel_operands: Optional[List[Value]] = None, + async_dependencies: Optional[List[Value]] = None, + dynamic_shared_memory_size: Optional[Value] = None, + async_object=None, + *, + loc=None, + ip=None, +) -> Union[Value, List[Value], LaunchFuncOp]: + op = LaunchFuncOp( + kernel=kernel, + grid_size=grid_size, + block_size=block_size, + kernel_operands=kernel_operands, + async_dependencies=async_dependencies, + dynamic_shared_memory_size=dynamic_shared_memory_size, + async_object=async_object, + loc=loc, + ip=ip, + ) + results = op.results + if len(results) == 1: + return results[0] + elif len(results) > 1: + return results + else: + return op + + +def wait( + async_dependencies: Optional[List[Value]] = None, *, loc=None, ip=None +) -> Union[Value, List[Value], WaitOp]: + if async_dependencies is None: + async_dependencies = [] + return get_op_result_or_op_results( + WaitOp(gpu_async_token(), async_dependencies, loc=loc, ip=ip) + ) + + +@_ods_cext.register_operation(_Dialect, replace=True) +class LaunchOp(LaunchOp): + __doc__ = LaunchOp.__doc__ + + def __init__( + self, + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + async_dependencies=None, + dynamic_shared_memory_size: Optional[Value] = None, + *, + loc=None, + ip=None, + ): + if async_dependencies is None: + async_dependencies = [] + async_token = None + if len(async_dependencies): + async_token = gpu_async_token() + grid_size_x, grid_size_y, grid_size_z = map( + _convert_literal_to_constant, grid_size + ) + block_size_x, block_size_y, block_size_z = map( + _convert_literal_to_constant, block_size + ) + + super().__init__( + async_token, + async_dependencies, + grid_size_x, + grid_size_y, + grid_size_z, + block_size_x, + block_size_y, + block_size_z, + dynamicSharedMemorySize=dynamic_shared_memory_size, + loc=loc, + ip=ip, + ) + self.regions[0].blocks.append(*[T.index() for _ in range(12)]) + + +def launch_( + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + async_dependencies=None, + dynamic_shared_memory_size: Optional[Value] = None, + *, + loc=None, + ip=None, +): + grid_size = tuple(map(_convert_literal_to_constant, grid_size)) + block_size = tuple(map(_convert_literal_to_constant, block_size)) + launch_op = LaunchOp( + grid_size, + block_size, + async_dependencies, + dynamic_shared_memory_size, + loc=loc, + ip=ip, + ) + return launch_op + + +launch = region_op(launch_, terminator=lambda *_args: terminator()) + + +_printf = printf + + +def printf(format, *args, loc=None, ip=None): + return _printf(format=format, args=args, loc=loc, ip=ip) diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 1484d7e..8713689 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -1766,6 +1766,12 @@ acc.set default_async(%i32Value : i32) func.func @acc_atomic_read(%v: memref<i32>, %x: memref<i32>) { // CHECK: acc.atomic.read %[[v]] = %[[x]] : memref<i32>, memref<i32>, i32 acc.atomic.read %v = %x : memref<i32>, memref<i32>, i32 + + // CHECK-NEXT: %[[IFCOND1:.*]] = arith.constant true + // CHECK-NEXT: acc.atomic.read if(%[[IFCOND1]]) %[[v]] = %[[x]] : memref<i32>, memref<i32>, i32 + %ifCond = arith.constant true + acc.atomic.read if(%ifCond) %v = %x : memref<i32>, memref<i32>, i32 + return } @@ -1776,6 +1782,12 @@ func.func @acc_atomic_read(%v: memref<i32>, %x: memref<i32>) { func.func @acc_atomic_write(%addr : memref<i32>, %val : i32) { // CHECK: acc.atomic.write %[[ADDR]] = %[[VAL]] : memref<i32>, i32 acc.atomic.write %addr = %val : memref<i32>, i32 + + // CHECK-NEXT: %[[IFCOND1:.*]] = arith.constant true + // CHECK-NEXT: acc.atomic.write if(%[[IFCOND1]]) %[[ADDR]] = %[[VAL]] : memref<i32>, i32 + %ifCond = arith.constant true + acc.atomic.write if(%ifCond) %addr = %val : memref<i32>, i32 + return } @@ -1793,6 +1805,19 @@ func.func @acc_atomic_update(%x : memref<i32>, %expr : i32, %xBool : memref<i1>, %newval = llvm.add %xval, %expr : i32 acc.yield %newval : i32 } + + // CHECK: %[[IFCOND1:.*]] = arith.constant true + // CHECK-NEXT: acc.atomic.update if(%[[IFCOND1]]) %[[X]] : memref<i32> + // CHECK-NEXT: (%[[XVAL:.*]]: i32): + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.add %[[XVAL]], %[[EXPR]] : i32 + // CHECK-NEXT: acc.yield %[[NEWVAL]] : i32 + %ifCond = arith.constant true + acc.atomic.update if (%ifCond) %x : memref<i32> { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + acc.yield %newval : i32 + } + // CHECK: acc.atomic.update %[[XBOOL]] : memref<i1> // CHECK-NEXT: (%[[XVAL:.*]]: i1): // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.and %[[XVAL]], %[[EXPRBOOL]] : i1 @@ -1902,6 +1927,17 @@ func.func @acc_atomic_capture(%v: memref<i32>, %x: memref<i32>, %expr: i32) { acc.atomic.write %x = %expr : memref<i32>, i32 } + // CHECK: %[[IFCOND1:.*]] = arith.constant true + // CHECK-NEXT: acc.atomic.capture if(%[[IFCOND1]]) { + // CHECK-NEXT: acc.atomic.read %[[v]] = %[[x]] : memref<i32>, memref<i32>, i32 + // CHECK-NEXT: acc.atomic.write %[[x]] = %[[expr]] : memref<i32>, i32 + // CHECK-NEXT: } + %ifCond = arith.constant true + acc.atomic.capture if (%ifCond) { + acc.atomic.read %v = %x : memref<i32>, memref<i32>, i32 + acc.atomic.write %x = %expr : memref<i32>, i32 + } + return } diff --git a/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir b/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir index fe3b3e6..1737c6b 100644 --- a/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir +++ b/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir @@ -8,7 +8,7 @@ // CHECK-DAG: %[[C17:.*]] = arith.constant 17 : index // CHECK: %[[FIRST:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C4]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MIN:.*]] = affine.min #[[MAP]](%[[C4]], %[[IV]])[%[[C4]]] +// CHECK: %[[MIN:.*]] = affine.min #[[MAP]](%[[C17]], %[[IV]])[%[[C4]]] // CHECK: %[[CAST:.*]] = arith.index_cast %[[MIN]] : index to i32 // CHECK: %[[INIT:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32 // CHECK: scf.yield %[[INIT]] @@ -35,7 +35,49 @@ func.func @fully_static_bounds() -> i32 { } return %r : i32 } +// ----- +// CHECK-LABEL: func.func @static_two_iterations_ub_used_in_loop( +// CHECK-SAME: %[[IFM1:.*]]: memref<1xi32>) -> i32 { +// CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 +// CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index +// CHECK: %[[C4_IDX:.*]] = arith.constant 4 : index +// CHECK: %[[C7_IDX:.*]] = arith.constant 7 : index +// CHECK: %[[FOR1:.*]] = scf.for %[[IV1:.*]] = %[[C0_IDX]] to %[[C4_IDX]] step %[[C4_IDX]] iter_args(%[[ARG1:.*]] = %[[C0_I32]]) -> (i32) { +// CHECK: %[[AFFINE_MIN1:.*]] = affine.min #map(%[[C7_IDX]], %[[IV1]]){{\[}}%[[C4_IDX]]] +// CHECK: %[[CAST1:.*]] = arith.index_cast %[[AFFINE_MIN1]] : index to i32 +// CHECK: %[[ADDI1:.*]] = arith.addi %[[ARG1]], %[[CAST1]] : i32 +// CHECK: %[[LOAD1:.*]] = memref.load %[[IFM1]]{{\[}}%[[C7_IDX]]] : memref<1xi32> +// CHECK: %[[ADDI2:.*]] = arith.addi %[[ADDI1]], %[[LOAD1]] : i32 +// CHECK: scf.yield %[[ADDI2]] : i32 +// CHECK: } +// CHECK: %[[FOR2:.*]] = scf.for %[[IV2:.*]] = %[[C4_IDX]] to %[[C7_IDX]] step %[[C4_IDX]] iter_args(%[[ARG2:.*]] = %[[RESULT1:.*]]) -> (i32) { +// CHECK: %[[AFFINE_MIN2:.*]] = affine.min #map(%[[C7_IDX]], %[[IV2]]){{\[}}%[[C4_IDX]]] +// CHECK: %[[CAST2:.*]] = arith.index_cast %[[AFFINE_MIN2]] : index to i32 +// CHECK: %[[ADDI3:.*]] = arith.addi %[[ARG2]], %[[CAST2]] : i32 +// CHECK: %[[LOAD2:.*]] = memref.load %[[IFM1]]{{\[}}%[[C7_IDX]]] : memref<1xi32> +// CHECK: %[[ADDI4:.*]] = arith.addi %[[ADDI3]], %[[LOAD2]] : i32 +// CHECK: scf.yield %[[ADDI4]] : i32 +// CHECK: } +// CHECK: return %[[RESULT2:.*]] : i32 +// CHECK: } + +#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> +func.func @static_two_iterations_ub_used_in_loop(%arg0: memref<1xi32>) -> i32 { + %c0_i32 = arith.constant 0 : i32 + %lb = arith.constant 0 : index + %step = arith.constant 4 : index + %ub = arith.constant 7 : index + %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0_i32) -> i32 { + %s = affine.min #map(%ub, %iv)[%step] + %casted = arith.index_cast %s : index to i32 + %0 = arith.addi %arg, %casted : i32 + %1 = memref.load %arg0[%ub] : memref<1xi32> + %result = arith.addi %0, %1 : i32 + scf.yield %result : i32 + } + return %r : i32 +} // ----- // CHECK-DAG: #[[MAP:.*]] = affine_map<(d0, d1)[s0] -> (4, d0 - d1)> @@ -44,7 +86,7 @@ func.func @fully_static_bounds() -> i32 { // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[C4]] step %[[C4]] { -// CHECK: %[[MIN:.*]] = affine.min #[[MAP]](%[[C4]], %[[IV]])[%[[C4]]] +// CHECK: %[[MIN:.*]] = affine.min #[[MAP]](%[[UB]], %[[IV]])[%[[C4]]] // CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][] // CHECK: %[[CAST:.*]] = arith.index_cast %[[MIN]] // CHECK: %[[ADD:.*]] = arith.addi %[[LOAD]], %[[CAST]] : i32 @@ -83,7 +125,7 @@ func.func @no_loop_results(%ub : index, %d : memref<i32>) { // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[STEP]]] // CHECK: %[[FIRST:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]] // CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MIN:.*]] = affine.min #[[MAP1]](%[[NEW_UB]], %[[IV]])[%[[STEP]]] +// CHECK: %[[MIN:.*]] = affine.min #[[MAP1]](%[[UB]], %[[IV]])[%[[STEP]]] // CHECK: %[[CAST:.*]] = arith.index_cast %[[MIN]] : index to i32 // CHECK: %[[ADD:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32 // CHECK: scf.yield %[[ADD]] @@ -119,7 +161,7 @@ func.func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 { // CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index // CHECK: %[[FIRST:.*]] = scf.for %[[IV:.*]] = %[[C2]] to %[[C6]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MIN:.*]] = affine.min #[[MAP]](%[[C6]], %[[IV]])[%[[C4]]] +// CHECK: %[[MIN:.*]] = affine.min #[[MAP]](%[[C8]], %[[IV]])[%[[C4]]] // CHECK: %[[CAST:.*]] = arith.index_cast %[[MIN]] : index to i32 // CHECK: %[[INIT:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32 // CHECK: scf.yield %[[INIT]] diff --git a/mlir/test/Dialect/SCF/for-loop-peeling.mlir b/mlir/test/Dialect/SCF/for-loop-peeling.mlir index be58548..0845766 100644 --- a/mlir/test/Dialect/SCF/for-loop-peeling.mlir +++ b/mlir/test/Dialect/SCF/for-loop-peeling.mlir @@ -35,6 +35,35 @@ func.func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 { // ----- +// CHECK-LABEL: func.func @static_two_iterations_ub_used_in_loop( +// CHECK-SAME: %[[IFM1:.*]]: memref<1xi32>) -> i32 { +// CHECK: %[[C7_I32:.*]] = arith.constant 7 : i32 +// CHECK: %[[C7_IDX:.*]] = arith.constant 7 : index +// CHECK: %[[LOAD1:.*]] = memref.load %[[IFM1]]{{\[}}%[[C7_IDX]]] : memref<1xi32> +// CHECK: %[[ADDI1:.*]] = arith.addi %[[LOAD1]], %[[C7_I32]] : i32 +// CHECK: %[[LOAD2:.*]] = memref.load %[[IFM1]]{{\[}}%[[C7_IDX]]] : memref<1xi32> +// CHECK: %[[ADDI2:.*]] = arith.addi %[[ADDI1]], %[[LOAD2]] : i32 +// CHECK: return %[[ADDI2]] : i32 +// CHECK: } + +#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> +func.func @static_two_iterations_ub_used_in_loop(%arg0: memref<1xi32>) -> i32 { + %c0_i32 = arith.constant 0 : i32 + %lb = arith.constant 0 : index + %step = arith.constant 4 : index + %ub = arith.constant 7 : index + %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0_i32) -> i32 { + %s = affine.min #map(%ub, %iv)[%step] + %casted = arith.index_cast %s : index to i32 + %0 = arith.addi %arg, %casted : i32 + %1 = memref.load %arg0[%ub] : memref<1xi32> + %result = arith.addi %0, %1 : i32 + scf.yield %result : i32 + } + return %r : i32 +} +// ----- + // CHECK: func @fully_static_bounds( // CHECK-DAG: %[[C0_I32:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[C1_I32:.*]] = arith.constant 1 : i32 diff --git a/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir index 33b8776..c1447b3 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir @@ -177,25 +177,3 @@ func.func @clamp_ulessthanequal(%input: i32, %min: i32, %max: i32) -> i32 { // CHECK-NEXT: spirv.ReturnValue [[RES]] spirv.ReturnValue %2 : i32 } - -// ----- - -//===----------------------------------------------------------------------===// -// spirv.GL.Length -//===----------------------------------------------------------------------===// - -// CHECK-LABEL: @convert_length_into_fabs_scalar -func.func @convert_length_into_fabs_scalar(%arg0 : f32) -> f32 { - //CHECK: spirv.GL.FAbs {{%.*}} : f32 - //CHECK-NOT: spirv.GL.Length - %0 = spirv.GL.Length %arg0 : f32 -> f32 - spirv.ReturnValue %0 : f32 -} - -// CHECK-LABEL: @dont_convert_length_into_fabs_vec -func.func @dont_convert_length_into_fabs_vec(%arg0 : vector<3xf32>) -> f32 { - //CHECK: spirv.GL.Length {{%.*}} : vector<3xf32> -> f32 - //CHECK-NOT: spirv.GL.FAbs - %0 = spirv.GL.Length %arg0 : vector<3xf32> -> f32 - spirv.ReturnValue %0 : f32 -} diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 66c4018..3945c99 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -2,7 +2,8 @@ from mlir.ir import * import mlir.ir as ir -import mlir.dialects.gpu as gpu +from mlir.dialects import gpu, func, arith, math +from mlir.extras import types as T import mlir.dialects.gpu.passes from mlir.passmanager import * @@ -157,3 +158,96 @@ def testGPUFuncOp(): # CHECK: %[[VAL_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } + + +# CHECK-LABEL: testGPULaunchFuncOp +@run +def testGPULaunchFuncOp(): + module = Module.create() + + module.operation.attributes["gpu.container_module"] = UnitAttr.get() + with InsertionPoint(module.body): + gpu_module = gpu.GPUModuleOp("gpu_module") + block = gpu_module.bodyRegion.blocks.append() + + with InsertionPoint(block): + gpu_func = gpu.GPUFuncOp( + FunctionType.get([], []), + "kernel", + body_builder=lambda func: gpu.return_([]), + kernel=True, + ) + + with InsertionPoint(module.body): + host = func.FuncOp(type=FunctionType.get([], []), name="host") + + with InsertionPoint(host.add_entry_block()): + c1 = arith.constant(T.index(), 1) + grid_sizes = (1, 1, 1) + block_sizes = (1, 1, 1) + token = gpu.wait() + token = gpu.launch_func( + async_dependencies=[token], + kernel=[gpu_module.sym_name.value, gpu_func.name.value], + grid_size=grid_sizes, + block_size=block_sizes, + kernel_operands=[], + ) + gpu.wait(async_dependencies=[token]) + func.ReturnOp([]) + + print(module) + + # CHECK-LABEL: gpu.module @gpu_module { + # CHECK: gpu.func @kernel() kernel { + # CHECK: gpu.return + # CHECK: } + # CHECK: } + + # CHECK-LABEL: func.func @host() { + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index + # CHECK: %[[WAIT_0:.*]] = gpu.wait async + # CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index + # CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]]) + # CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]] + # CHECK: return + # CHECK: } + + +# CHECK-LABEL: testGPULaunchOp +@run +def testGPULaunchOp(): + module = Module.create() + + with InsertionPoint(module.body): + host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf") + + entry_block = host.add_entry_block() + with InsertionPoint(entry_block): + c1 = arith.constant(T.index(), 1) + grid_sizes = (c1, c1, c1) + block_sizes = (c1, c1, c1) + + launch = gpu.launch(grid_sizes, block_sizes) + + op = launch(lambda *args: gpu.printf("%f", args[0])) + + with InsertionPoint(entry_block): + func.ReturnOp([]) + + print(module) + + # CHECK-LABEL: func.func @gpu_printf( + # CHECK-SAME: %[[ARG0:.*]]: f32) { + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index + # CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) { + # CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index + # CHECK: gpu.terminator + # CHECK: } + # CHECK: return + # CHECK: } diff --git a/mlir/test/python/dialects/llvm.py b/mlir/test/python/dialects/llvm.py index d9ffdeb6..8ea0fdd 100644 --- a/mlir/test/python/dialects/llvm.py +++ b/mlir/test/python/dialects/llvm.py @@ -150,3 +150,22 @@ def testIntrinsics(): result = llvm.intr_memset(alloca, c_0, c_128, False) # CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[CST0]], %[[CST128]]) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () print(result) + + +# CHECK-LABEL: testTranslateToLLVMIR +@constructAndPrintInModule +def testTranslateToLLVMIR(): + with Context(), Location.unknown(): + module = Module.parse( + """\ + llvm.func @add(%arg0: i64, %arg1: i64) -> i64 { + %0 = llvm.add %arg0, %arg1 : i64 + llvm.return %0 : i64 + } + """ + ) + # CHECK: define i64 @add(i64 %0, i64 %1) { + # CHECK: %3 = add i64 %0, %1 + # CHECK: ret i64 %3 + # CHECK: } + print(llvm.translate_module_to_llvmir(module.operation)) diff --git a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt index d5f40a4..177c868 100644 --- a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt @@ -1,8 +1,13 @@ add_mlir_unittest(MLIROpenACCTests OpenACCOpsTest.cpp + OpenACCUtilsTest.cpp ) mlir_target_link_libraries(MLIROpenACCTests PRIVATE MLIRIR + MLIRFuncDialect + MLIRMemRefDialect + MLIRArithDialect MLIROpenACCDialect + MLIROpenACCUtils ) diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTest.cpp new file mode 100644 index 0000000..ab817b6 --- /dev/null +++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTest.cpp @@ -0,0 +1,412 @@ +//===- OpenACCUtilsTest.cpp - Unit tests for OpenACC utilities -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/OpenACC/OpenACCUtils.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OwningOpRef.h" +#include "mlir/IR/Value.h" +#include "gtest/gtest.h" + +using namespace mlir; +using namespace mlir::acc; + +//===----------------------------------------------------------------------===// +// Test Fixture +//===----------------------------------------------------------------------===// + +class OpenACCUtilsTest : public ::testing::Test { +protected: + OpenACCUtilsTest() : b(&context), loc(UnknownLoc::get(&context)) { + context.loadDialect<acc::OpenACCDialect, arith::ArithDialect, + memref::MemRefDialect, func::FuncDialect>(); + } + + MLIRContext context; + OpBuilder b; + Location loc; +}; + +//===----------------------------------------------------------------------===// +// getEnclosingComputeOp Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTest, getEnclosingComputeOpParallel) { + // Create a parallel op with a region + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + parallelRegion.emplaceBlock(); + + // Test that we can find the parallel op from its region + Operation *enclosingOp = getEnclosingComputeOp(parallelRegion); + EXPECT_EQ(enclosingOp, parallelOp.get()); +} + +TEST_F(OpenACCUtilsTest, getEnclosingComputeOpKernels) { + // Create a kernels op with a region + OwningOpRef<KernelsOp> kernelsOp = + KernelsOp::create(b, loc, TypeRange{}, ValueRange{}); + Region &kernelsRegion = kernelsOp->getRegion(); + kernelsRegion.emplaceBlock(); + + // Test that we can find the kernels op from its region + Operation *enclosingOp = getEnclosingComputeOp(kernelsRegion); + EXPECT_EQ(enclosingOp, kernelsOp.get()); +} + +TEST_F(OpenACCUtilsTest, getEnclosingComputeOpSerial) { + // Create a serial op with a region + OwningOpRef<SerialOp> serialOp = + SerialOp::create(b, loc, TypeRange{}, ValueRange{}); + Region &serialRegion = serialOp->getRegion(); + serialRegion.emplaceBlock(); + + // Test that we can find the serial op from its region + Operation *enclosingOp = getEnclosingComputeOp(serialRegion); + EXPECT_EQ(enclosingOp, serialOp.get()); +} + +TEST_F(OpenACCUtilsTest, getEnclosingComputeOpNested) { + // Create nested ops: parallel containing a loop op + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(parallelBlock); + + // Create a loop op inside the parallel region + OwningOpRef<LoopOp> loopOp = + LoopOp::create(b, loc, TypeRange{}, ValueRange{}); + Region &loopRegion = loopOp->getRegion(); + loopRegion.emplaceBlock(); + + // Test that from the loop region, we find the parallel op (loop is not a + // compute op) + Operation *enclosingOp = getEnclosingComputeOp(loopRegion); + EXPECT_EQ(enclosingOp, parallelOp.get()); +} + +TEST_F(OpenACCUtilsTest, getEnclosingComputeOpNone) { + // Create a module with a region that's not inside a compute construct + OwningOpRef<ModuleOp> moduleOp = ModuleOp::create(loc); + Region &moduleRegion = moduleOp->getBodyRegion(); + + // Test that we get nullptr when there's no enclosing compute op + Operation *enclosingOp = getEnclosingComputeOp(moduleRegion); + EXPECT_EQ(enclosingOp, nullptr); +} + +//===----------------------------------------------------------------------===// +// isOnlyUsedByPrivateClauses Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTest, isOnlyUsedByPrivateClausesTrue) { + // Create a value (memref) outside the compute region + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue<PointerLikeType> varPtr = + cast<TypedValue<PointerLikeType>>(allocOp->getResult()); + + // Create a parallel op with a region + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(parallelBlock); + + // Create a private op using the value + OwningOpRef<PrivateOp> privateOp = PrivateOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Test that the value is only used by private clauses + EXPECT_TRUE(isOnlyUsedByPrivateClauses(varPtr, parallelRegion)); +} + +TEST_F(OpenACCUtilsTest, isOnlyUsedByPrivateClausesFalse) { + // Create a value (memref) outside the compute region + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue<PointerLikeType> varPtr = + cast<TypedValue<PointerLikeType>>(allocOp->getResult()); + + // Create a parallel op with a region + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(parallelBlock); + + // Create a private op using the value + OwningOpRef<PrivateOp> privateOp = PrivateOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Also use the value in a function call (escape) + OwningOpRef<func::CallOp> callOp = func::CallOp::create( + b, loc, "some_func", TypeRange{}, ValueRange{varPtr}); + + // Test that the value is NOT only used by private clauses (it escapes via + // call) + EXPECT_FALSE(isOnlyUsedByPrivateClauses(varPtr, parallelRegion)); +} + +TEST_F(OpenACCUtilsTest, isOnlyUsedByPrivateClausesMultiple) { + // Create a value (memref) outside the compute region + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue<PointerLikeType> varPtr = + cast<TypedValue<PointerLikeType>>(allocOp->getResult()); + + // Create a parallel op with a region + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(parallelBlock); + + // Create multiple private ops using the value + OwningOpRef<PrivateOp> privateOp1 = PrivateOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + OwningOpRef<PrivateOp> privateOp2 = PrivateOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Test that the value is only used by private clauses even with multiple uses + EXPECT_TRUE(isOnlyUsedByPrivateClauses(varPtr, parallelRegion)); +} + +//===----------------------------------------------------------------------===// +// isOnlyUsedByReductionClauses Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTest, isOnlyUsedByReductionClausesTrue) { + // Create a value (memref) outside the compute region + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue<PointerLikeType> varPtr = + cast<TypedValue<PointerLikeType>>(allocOp->getResult()); + + // Create a parallel op with a region + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(parallelBlock); + + // Create a reduction op using the value + OwningOpRef<ReductionOp> reductionOp = ReductionOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Test that the value is only used by reduction clauses + EXPECT_TRUE(isOnlyUsedByReductionClauses(varPtr, parallelRegion)); +} + +TEST_F(OpenACCUtilsTest, isOnlyUsedByReductionClausesFalse) { + // Create a value (memref) outside the compute region + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue<PointerLikeType> varPtr = + cast<TypedValue<PointerLikeType>>(allocOp->getResult()); + + // Create a parallel op with a region + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(parallelBlock); + + // Create a reduction op using the value + OwningOpRef<ReductionOp> reductionOp = ReductionOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Also use the value in a function call (escape) + OwningOpRef<func::CallOp> callOp = func::CallOp::create( + b, loc, "some_func", TypeRange{}, ValueRange{varPtr}); + + // Test that the value is NOT only used by reduction clauses (it escapes via + // call) + EXPECT_FALSE(isOnlyUsedByReductionClauses(varPtr, parallelRegion)); +} + +TEST_F(OpenACCUtilsTest, isOnlyUsedByReductionClausesMultiple) { + // Create a value (memref) outside the compute region + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue<PointerLikeType> varPtr = + cast<TypedValue<PointerLikeType>>(allocOp->getResult()); + + // Create a parallel op with a region + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(parallelBlock); + + // Create multiple reduction ops using the value + OwningOpRef<ReductionOp> reductionOp1 = ReductionOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + OwningOpRef<ReductionOp> reductionOp2 = ReductionOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Test that the value is only used by reduction clauses even with multiple + // uses + EXPECT_TRUE(isOnlyUsedByReductionClauses(varPtr, parallelRegion)); +} + +//===----------------------------------------------------------------------===// +// getDefaultAttr Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTest, getDefaultAttrOnParallel) { + // Create a parallel op with a default attribute + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + parallelOp->setDefaultAttr(ClauseDefaultValue::None); + + // Test that we can retrieve the default attribute + std::optional<ClauseDefaultValue> defaultAttr = + getDefaultAttr(parallelOp.get()); + EXPECT_TRUE(defaultAttr.has_value()); + EXPECT_EQ(defaultAttr.value(), ClauseDefaultValue::None); +} + +TEST_F(OpenACCUtilsTest, getDefaultAttrOnKernels) { + // Create a kernels op with a default attribute + OwningOpRef<KernelsOp> kernelsOp = + KernelsOp::create(b, loc, TypeRange{}, ValueRange{}); + kernelsOp->setDefaultAttr(ClauseDefaultValue::Present); + + // Test that we can retrieve the default attribute + std::optional<ClauseDefaultValue> defaultAttr = + getDefaultAttr(kernelsOp.get()); + EXPECT_TRUE(defaultAttr.has_value()); + EXPECT_EQ(defaultAttr.value(), ClauseDefaultValue::Present); +} + +TEST_F(OpenACCUtilsTest, getDefaultAttrOnSerial) { + // Create a serial op with a default attribute + OwningOpRef<SerialOp> serialOp = + SerialOp::create(b, loc, TypeRange{}, ValueRange{}); + serialOp->setDefaultAttr(ClauseDefaultValue::None); + + // Test that we can retrieve the default attribute + std::optional<ClauseDefaultValue> defaultAttr = + getDefaultAttr(serialOp.get()); + EXPECT_TRUE(defaultAttr.has_value()); + EXPECT_EQ(defaultAttr.value(), ClauseDefaultValue::None); +} + +TEST_F(OpenACCUtilsTest, getDefaultAttrOnData) { + // Create a data op with a default attribute + OwningOpRef<DataOp> dataOp = + DataOp::create(b, loc, TypeRange{}, ValueRange{}); + dataOp->setDefaultAttr(ClauseDefaultValue::Present); + + // Test that we can retrieve the default attribute + std::optional<ClauseDefaultValue> defaultAttr = getDefaultAttr(dataOp.get()); + EXPECT_TRUE(defaultAttr.has_value()); + EXPECT_EQ(defaultAttr.value(), ClauseDefaultValue::Present); +} + +TEST_F(OpenACCUtilsTest, getDefaultAttrNone) { + // Create a parallel op without setting a default attribute + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + // Do not set default attribute + + // Test that we get std::nullopt when there's no default attribute + std::optional<ClauseDefaultValue> defaultAttr = + getDefaultAttr(parallelOp.get()); + EXPECT_FALSE(defaultAttr.has_value()); +} + +TEST_F(OpenACCUtilsTest, getDefaultAttrNearest) { + // Create a data op with a default attribute + OwningOpRef<DataOp> dataOp = + DataOp::create(b, loc, TypeRange{}, ValueRange{}); + dataOp->setDefaultAttr(ClauseDefaultValue::Present); + + Region &dataRegion = dataOp->getRegion(); + Block *dataBlock = &dataRegion.emplaceBlock(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(dataBlock); + + // Create a parallel op inside the data region with NO default attribute + OwningOpRef<ParallelOp> parallelOp = + ParallelOp::create(b, loc, TypeRange{}, ValueRange{}); + // Do not set default attribute on parallel op + + Region ¶llelRegion = parallelOp->getRegion(); + Block *parallelBlock = ¶llelRegion.emplaceBlock(); + + b.setInsertionPointToStart(parallelBlock); + + // Create a loop op inside the parallel region + OwningOpRef<LoopOp> loopOp = + LoopOp::create(b, loc, TypeRange{}, ValueRange{}); + + // Test that from the loop op, we find the nearest default attribute (from + // data op) + std::optional<ClauseDefaultValue> defaultAttr = getDefaultAttr(loopOp.get()); + EXPECT_TRUE(defaultAttr.has_value()); + EXPECT_EQ(defaultAttr.value(), ClauseDefaultValue::Present); +} + +//===----------------------------------------------------------------------===// +// getTypeCategory Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTest, getTypeCategoryScalar) { + // Create a scalar memref (no dimensions) + auto scalarMemrefTy = MemRefType::get({}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, scalarMemrefTy); + Value varPtr = allocOp->getResult(); + + // Test that a scalar memref returns scalar category + VariableTypeCategory category = getTypeCategory(varPtr); + EXPECT_EQ(category, VariableTypeCategory::scalar); +} + +TEST_F(OpenACCUtilsTest, getTypeCategoryArray) { + // Create an array memref (with dimensions) + auto arrayMemrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef<memref::AllocaOp> allocOp = + memref::AllocaOp::create(b, loc, arrayMemrefTy); + Value varPtr = allocOp->getResult(); + + // Test that an array memref returns array category + VariableTypeCategory category = getTypeCategory(varPtr); + EXPECT_EQ(category, VariableTypeCategory::array); +} diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member.cpp new file mode 100644 index 0000000..6fef34f --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f7() { +#pragma omp target data map(to : c) + { + void *mapped_ptr = omp_get_mapped_ptr(&c, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_addr(c) + { + printf("%d\n", &c == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f7(); +} diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref.cpp new file mode 100644 index 0000000..8ca02dd --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f8() { +#pragma omp target enter data map(to : d) + { + void *mapped_ptr = omp_get_mapped_ptr(&d, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_addr(d) + { + printf("%d\n", &d == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f8(); +} diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref_with_map.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref_with_map.cpp new file mode 100644 index 0000000..5e8769e --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref_with_map.cpp @@ -0,0 +1,49 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f6() { + uintptr_t offset = (uintptr_t)&d - n; +#pragma omp target data map(to : m, d) + { + void *mapped_ptr = omp_get_mapped_ptr(&d, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(m, d) use_device_addr(d) + { + // FIXME: Clang is mapping class member references using: + // &this[0], &ref_ptee(this[0].d), 4, PTR_AND_OBJ + // but a load from `this[0]` cannot be used to compute the offset + // in the runtime, because for example in this case, it would mean + // that the base address of the pointee is a load from `n`, i.e. 111. + // clang should be emitting the following instead: + // &ref_ptr(this[0].d), &ref_ptee(this[0].d), 4, PTR_AND_OBJ + // And eventually, the following that's compatible with the + // ref/attach modifiers: + // &ref_ptee(this[0].[d])), &ref_ptee(this[0].d), TO | FROM + // &ref_ptr(this[0].d), &ref_ptee(this[0].d), 4, ATTACH + // EXPECTED: 1 0 + // CHECK: 0 1 + printf("%d %d\n", &d == mapped_ptr, + (uintptr_t)&d == (uintptr_t)mapped_ptr - offset); + } + } + } +}; + +int main() { + ST s; + s.f6(); +} diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_with_map.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_with_map.cpp new file mode 100644 index 0000000..f5db4ec --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_with_map.cpp @@ -0,0 +1,43 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f5() { + uintptr_t offset = (uintptr_t)&c - (uintptr_t)this; +#pragma omp target data map(to : m, c) + { + void *mapped_ptr = omp_get_mapped_ptr(&c, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(m, c) use_device_addr(c) + { + // FIXME: RT is currently doing the translation for "&this[0]" instead + // of &this->c, for a map like: + // this, &this->c, ..., RETURN_PARAM + // We either need to fix RT, or emit a separate entry for such + // use_device_addr, even if there is a matching map entry already. + // EXPECTED: 1 0 + // CHECK: 0 1 + printf("%d %d\n", &c == mapped_ptr, + (uintptr_t)&c == (uintptr_t)mapped_ptr - offset); + } + } + } +}; + +int main() { + ST s; + s.f5(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member.cpp new file mode 100644 index 0000000..b0253cd --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f3() { +#pragma omp target data map(to : a[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(a, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_ptr(a) + { + printf("%d\n", a == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f3(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref.cpp new file mode 100644 index 0000000..4de3448 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f4() { +#pragma omp target data map(to : b[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(b, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_ptr(b) + { + printf("%d\n", b == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f4(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_with_map.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_with_map.cpp new file mode 100644 index 0000000..27fda74 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_with_map.cpp @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +// XFAIL: * + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f2() { +#pragma omp target data map(to : b[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(b, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(b[0], m) use_device_ptr(b) + { + printf("%d\n", b == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f2(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_with_map.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_with_map.cpp new file mode 100644 index 0000000..38a3696 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_with_map.cpp @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +// XFAIL: * + +#include <omp.h> +#include <stdio.h> + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f1() { +#pragma omp target data map(to : a[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(a, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(a[0], m) use_device_ptr(a) + { + printf("%d\n", a == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f1(); +} diff --git a/polly/test/Unit/lit.cfg b/polly/test/Unit/lit.cfg index 6c450fb..21d7bc4 100644 --- a/polly/test/Unit/lit.cfg +++ b/polly/test/Unit/lit.cfg @@ -50,7 +50,7 @@ for var in [ if platform.system() == 'Darwin': shlibpath_var = 'DYLD_LIBRARY_PATH' -elif platform.system() == 'Windows': +elif platform.system() == 'Windows' or sys.platform == "cygwin": shlibpath_var = 'PATH' else: shlibpath_var = 'LD_LIBRARY_PATH' diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 5357a6a..b415a36 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -5672,6 +5672,57 @@ cc_binary( ) cc_binary( + name = "llvm-sim", + testonly = True, + srcs = glob([ + "tools/llvm-sim/*.cpp", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Analysis", + ":Core", + ":IRReader", + ":Support", + ":config", + ], +) + +cc_binary( + name = "llvm-ir2vec", + testonly = True, + srcs = glob([ + "tools/llvm-ir2vec/*.cpp", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Analysis", + ":Core", + ":IRReader", + ":Support", + ":config", + ], +) + +cc_binary( + name = "llvm-ctxprof-util", + testonly = True, + srcs = glob([ + "tools/llvm-ctxprof-util/*.cpp", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Core", + ":Object", + ":ProfileData", + ":Support", + ":config", + ], +) + +cc_binary( name = "obj2yaml", testonly = True, srcs = glob([ diff --git a/utils/bazel/llvm-project-overlay/llvm/config.bzl b/utils/bazel/llvm-project-overlay/llvm/config.bzl index 3e9c032..fbd6f6b 100644 --- a/utils/bazel/llvm-project-overlay/llvm/config.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/config.bzl @@ -105,6 +105,7 @@ llvm_config_defines = os_defines + builtin_thread_pointer + select({ "@bazel_tools//src/conditions:darwin_x86_64": native_arch_defines("X86", "x86_64-unknown-darwin"), "@bazel_tools//src/conditions:linux_aarch64": native_arch_defines("AArch64", "aarch64-unknown-linux-gnu"), "@bazel_tools//src/conditions:linux_ppc64le": native_arch_defines("PowerPC", "powerpc64le-unknown-linux-gnu"), + "@bazel_tools//src/conditions:linux_riscv64": native_arch_defines("RISCV", "riscv64-unknown-linux-gnu"), "@bazel_tools//src/conditions:linux_s390x": native_arch_defines("SystemZ", "systemz-unknown-linux_gnu"), "//conditions:default": native_arch_defines("X86", "x86_64-unknown-linux-gnu"), }) + [ |