diff options
-rw-r--r-- | clang/docs/LanguageExtensions.rst | 5 | ||||
-rw-r--r-- | clang/include/clang/Basic/LangOptions.h | 4 | ||||
-rw-r--r-- | clang/include/clang/Driver/Options.td | 2 | ||||
-rw-r--r-- | clang/include/clang/Lex/LiteralConverter.h | 13 | ||||
-rw-r--r-- | clang/include/clang/Lex/LiteralSupport.h | 2 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/Clang.cpp | 8 | ||||
-rw-r--r-- | clang/lib/Frontend/InitPreprocessor.cpp | 6 | ||||
-rw-r--r-- | clang/lib/Lex/LiteralConverter.cpp | 66 | ||||
-rw-r--r-- | clang/lib/Lex/LiteralSupport.cpp | 2 | ||||
-rw-r--r-- | llvm/include/llvm/TargetParser/Triple.h | 4 | ||||
-rw-r--r-- | llvm/lib/TargetParser/Triple.cpp | 4 |
11 files changed, 50 insertions, 66 deletions
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index be612d4..e94b38e 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -415,8 +415,9 @@ Builtin Macros ``__clang_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of - narrow string literals, e.g., ``"hello"``. This macro typically expands to - the text encoding specified by -fexec-charset if specified, or the system charset. + narrow string literals, e.g., ``"hello"``. This macro expands to the text + encoding specified by ``-fexec-charset`` if any, or a system-specific default + otherwise: ``"IBM-1047"`` on z/OS and ``"UTF-8"`` on all other systems. ``__clang_wide_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 559a4be..ecf92ce 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -633,8 +633,8 @@ public: bool AtomicFineGrainedMemory = false; bool AtomicIgnoreDenormalMode = false; - /// Name of the exec charset to convert the internal charset to. - std::string ExecCharset; + /// Name of the execution encoding to convert the internal encoding to. + std::string ExecEncoding; LangOptions(); diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2da791b..f7aa659 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7251,7 +7251,7 @@ def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"<charset>">, HelpText<"Set the execution <charset> for string and character literals. " "Supported character encodings include ISO8859-1, UTF-8, IBM-1047 " "and those supported by the host icu or iconv library.">, - MarshallingInfoString<LangOpts<"ExecCharset">>; + MarshallingInfoString<LangOpts<"ExecEncoding">>; def target_cpu : Separate<["-"], "target-cpu">, HelpText<"Target a specific cpu type">, MarshallingInfoString<TargetOpts<"CPU">>; diff --git a/clang/include/clang/Lex/LiteralConverter.h b/clang/include/clang/Lex/LiteralConverter.h index 999b2c1..ee489bf 100644 --- a/clang/include/clang/Lex/LiteralConverter.h +++ b/clang/include/clang/Lex/LiteralConverter.h @@ -16,18 +16,17 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/TextEncoding.h" -enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset }; +enum ConversionAction { NoConversion, ToSystemEncoding, ToExecEncoding }; class LiteralConverter { - llvm::StringRef InternalCharset; - llvm::StringRef SystemCharset; - llvm::StringRef ExecCharset; - llvm::StringMap<llvm::TextEncodingConverter> TextEncodingConverters; + llvm::StringRef InternalEncoding; + llvm::StringRef SystemEncoding; + llvm::StringRef ExecEncoding; + llvm::TextEncodingConverter *ToSystemEncodingConverter; + llvm::TextEncodingConverter *ToExecEncodingConverter; public: - llvm::TextEncodingConverter *getConverter(const char *Codepage); llvm::TextEncodingConverter *getConverter(ConversionAction Action); - llvm::TextEncodingConverter *createAndInsertCharConverter(const char *To); void setConvertersFromOptions(const clang::LangOptions &Opts, const clang::TargetInfo &TInfo, clang::DiagnosticsEngine &Diags); diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h index eaa2016..af02969 100644 --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -251,7 +251,7 @@ public: StringLiteralParser( ArrayRef<Token> StringToks, Preprocessor &PP, StringLiteralEvalMethod StringMethod = StringLiteralEvalMethod::Evaluated, - ConversionAction Action = ToExecCharset); + ConversionAction Action = ToExecEncoding); StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm, const LangOptions &features, const TargetInfo &target, DiagnosticsEngine *diags = nullptr) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index cc20aad..aaab97d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7592,9 +7592,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Set the default fexec-charset as the system charset. CmdArgs.push_back("-fexec-charset"); - CmdArgs.push_back(Args.MakeArgString(Triple.getSystemCharset())); - if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { - StringRef value = execCharset->getValue(); + CmdArgs.push_back(Args.MakeArgString(Triple.getDefaultTextEncoding())); + if (Arg *execEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) { + StringRef value = execEncoding->getValue(); llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter = llvm::TextEncodingConverter::create("UTF-8", value.data()); if (ErrorOrConverter) { @@ -7602,7 +7602,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(value)); } else { D.Diag(diag::err_drv_invalid_value) - << execCharset->getAsString(Args) << value; + << execEncoding->getAsString(Args) << value; } } diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 39b684a..54a0348 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1060,11 +1060,11 @@ static void InitializePredefinedMacros(const TargetInfo &TI, // Macros to help identify the narrow and wide character sets. This is set // to fexec-charset. If fexec-charset is not specified, the default is the // system charset. - if (!LangOpts.ExecCharset.empty()) - Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecCharset); + if (!LangOpts.ExecEncoding.empty()) + Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecEncoding); else Builder.defineMacro("__clang_literal_encoding__", - TI.getTriple().getSystemCharset()); + TI.getTriple().getDefaultTextEncoding()); if (TI.getTypeWidth(TI.getWCharType()) >= 32) { // FIXME: 32-bit wchar_t signals UTF-32. This may change // if -fwide-exec-charset= is ever supported. diff --git a/clang/lib/Lex/LiteralConverter.cpp b/clang/lib/Lex/LiteralConverter.cpp index b00f44a..e9f8981 100644 --- a/clang/lib/Lex/LiteralConverter.cpp +++ b/clang/lib/Lex/LiteralConverter.cpp @@ -12,58 +12,42 @@ using namespace llvm; llvm::TextEncodingConverter * -LiteralConverter::getConverter(const char *Codepage) { - auto Iter = TextEncodingConverters.find(Codepage); - if (Iter != TextEncodingConverters.end()) - return &Iter->second; - return nullptr; -} - -llvm::TextEncodingConverter * LiteralConverter::getConverter(ConversionAction Action) { - StringRef CodePage; - if (Action == ToSystemCharset) - CodePage = SystemCharset; - else if (Action == ToExecCharset) - CodePage = ExecCharset; + if (Action == ToSystemEncoding) + return ToSystemEncodingConverter; + else if (Action == ToExecEncoding) + return ToExecEncodingConverter; else - CodePage = InternalCharset; - return getConverter(CodePage.data()); -} - -llvm::TextEncodingConverter * -LiteralConverter::createAndInsertCharConverter(const char *To) { - const char *From = InternalCharset.data(); - llvm::TextEncodingConverter *Converter = getConverter(To); - if (Converter) - return Converter; - - ErrorOr<TextEncodingConverter> ErrorOrConverter = - llvm::TextEncodingConverter::create(From, To); - if (!ErrorOrConverter) return nullptr; - TextEncodingConverters.insert_or_assign(StringRef(To), - std::move(*ErrorOrConverter)); - return getConverter(To); } void LiteralConverter::setConvertersFromOptions( const clang::LangOptions &Opts, const clang::TargetInfo &TInfo, clang::DiagnosticsEngine &Diags) { using namespace llvm; - SystemCharset = TInfo.getTriple().getSystemCharset(); - InternalCharset = "UTF-8"; - ExecCharset = Opts.ExecCharset.empty() ? InternalCharset : Opts.ExecCharset; - // Create converter between internal and system charset - if (InternalCharset != SystemCharset) - createAndInsertCharConverter(SystemCharset.data()); + InternalEncoding = "UTF-8"; + SystemEncoding = TInfo.getTriple().getDefaultTextEncoding(); + ExecEncoding = + Opts.ExecEncoding.empty() ? InternalEncoding : Opts.ExecEncoding; + // Create converter between internal and system encoding + if (InternalEncoding != SystemEncoding) { + ErrorOr<TextEncodingConverter> ErrorOrConverter = + llvm::TextEncodingConverter::create(InternalEncoding, SystemEncoding); + if (!ErrorOrConverter) + return; + ToSystemEncodingConverter = + new TextEncodingConverter(std::move(*ErrorOrConverter)); + } - // Create converter between internal and exec charset specified + // Create converter between internal and exec encoding specified // in fexec-charset option. - if (InternalCharset == ExecCharset) + if (InternalEncoding == ExecEncoding) return; - if (!createAndInsertCharConverter(ExecCharset.data())) { + ErrorOr<TextEncodingConverter> ErrorOrConverter = + llvm::TextEncodingConverter::create(InternalEncoding, ExecEncoding); + if (!ErrorOrConverter) Diags.Report(clang::diag::err_drv_invalid_value) - << "-fexec-charset" << ExecCharset; - } + << "-fexec-charset" << ExecEncoding; + ToExecEncodingConverter = + new TextEncodingConverter(std::move(*ErrorOrConverter)); } diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 622d758..6827e32 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -1835,7 +1835,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, llvm::TextEncodingConverter *Converter = nullptr; if (!isUTFLiteral(Kind) && LiteralConv) - Converter = LiteralConv->getConverter(ToExecCharset); + Converter = LiteralConv->getConverter(ToExecEncoding); while (begin != end) { // Is this a span of non-escape characters? diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 059c176..1ec34b9 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -491,8 +491,8 @@ public: /// For example, "fooos1.2.3" would return "1.2.3". StringRef getEnvironmentVersionString() const; - /// getSystemCharset - Get the system charset of the triple. - StringRef getSystemCharset() const; + /// getDefaultTextEncoding - Get the default encoding of the triple. + StringRef getDefaultTextEncoding() const; /// @} /// @name Convenience Predicates diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 4f55d05..772fdd8 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -1384,8 +1384,8 @@ StringRef Triple::getOSAndEnvironmentName() const { return Tmp.split('-').second; // Strip second component } -// System charset on z/OS is IBM-1047 and UTF-8 otherwise -StringRef Triple::getSystemCharset() const { +// Default encoding on z/OS is IBM-1047 and UTF-8 otherwise +StringRef Triple::getDefaultTextEncoding() const { if (getOS() == llvm::Triple::ZOS) return "IBM-1047"; return "UTF-8"; |