diff options
author | Abhina Sree <Abhina.Sreeskantharajan@ibm.com> | 2024-12-11 07:46:51 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-11 07:46:51 -0500 |
commit | 04379c98638ac3901257b5fa319f9ece828af767 (patch) | |
tree | ccfc13328102602462bede899c94a6208154c3e2 /clang/lib/Basic/SourceManager.cpp | |
parent | 0100c631f85480ecdf1b35f2aedbfc0200a81174 (diff) | |
download | llvm-04379c98638ac3901257b5fa319f9ece828af767.zip llvm-04379c98638ac3901257b5fa319f9ece828af767.tar.gz llvm-04379c98638ac3901257b5fa319f9ece828af767.tar.bz2 |
[SystemZ][z/OS] Update autoconversion functions to improve support for UTF-8 (#98652)
This fixes the following error when reading source and header files on
z/OS: error: source file is not valid UTF-8
Diffstat (limited to 'clang/lib/Basic/SourceManager.cpp')
-rw-r--r-- | clang/lib/Basic/SourceManager.cpp | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 6e588ce..849c18f 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/AutoConvert.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Endian.h" @@ -156,8 +157,11 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM, // Unless this is a named pipe (in which case we can handle a mismatch), // check that the file's size is the same as in the file entry (which may // have come from a stat cache). + // The buffer will always be larger than the file size on z/OS in the presence + // of characters outside the base character set. + assert(Buffer->getBufferSize() >= (size_t)ContentsEntry->getSize()); if (!ContentsEntry->isNamedPipe() && - Buffer->getBufferSize() != (size_t)ContentsEntry->getSize()) { + Buffer->getBufferSize() < (size_t)ContentsEntry->getSize()) { Diag.Report(Loc, diag::err_file_modified) << ContentsEntry->getName(); return std::nullopt; @@ -583,6 +587,18 @@ SourceManager::getOrCreateFileID(FileEntryRef SourceFile, FileCharacter); } +/// Helper function to determine if an input file requires conversion +bool needConversion(StringRef Filename) { +#ifdef __MVS__ + llvm::ErrorOr<bool> NeedConversion = + llvm::needzOSConversion(Filename.str().c_str()); + assert(NeedConversion && "Filename was not found"); + return *NeedConversion; +#else + return false; +#endif +} + /// createFileID - Create a new FileID for the specified ContentCache and /// include position. This works regardless of whether the ContentCache /// corresponds to a file or some other input source. @@ -602,6 +618,20 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename, return FileID::get(LoadedID); } unsigned FileSize = File.getSize(); + bool NeedConversion = needConversion(Filename); + if (NeedConversion) { + // Buffer size may increase due to potential z/OS EBCDIC to UTF-8 + // conversion. + if (std::optional<llvm::MemoryBufferRef> Buffer = + File.getBufferOrNone(Diag, getFileManager())) { + unsigned BufSize = Buffer->getBufferSize(); + if (BufSize > FileSize) { + if (File.ContentsEntry.has_value()) + File.ContentsEntry->updateFileEntryBufferSize(BufSize); + FileSize = BufSize; + } + } + } if (!(NextLocalOffset + FileSize + 1 > NextLocalOffset && NextLocalOffset + FileSize + 1 <= CurrentLoadedOffset)) { Diag.Report(IncludePos, diag::err_sloc_space_too_large); |