aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Basic/SourceManager.cpp
diff options
context:
space:
mode:
authorAbhina Sree <Abhina.Sreeskantharajan@ibm.com>2024-12-11 07:46:51 -0500
committerGitHub <noreply@github.com>2024-12-11 07:46:51 -0500
commit04379c98638ac3901257b5fa319f9ece828af767 (patch)
treeccfc13328102602462bede899c94a6208154c3e2 /clang/lib/Basic/SourceManager.cpp
parent0100c631f85480ecdf1b35f2aedbfc0200a81174 (diff)
downloadllvm-04379c98638ac3901257b5fa319f9ece828af767.zip
llvm-04379c98638ac3901257b5fa319f9ece828af767.tar.gz
llvm-04379c98638ac3901257b5fa319f9ece828af767.tar.bz2
[SystemZ][z/OS] Update autoconversion functions to improve support for UTF-8 (#98652)
This fixes the following error when reading source and header files on z/OS: error: source file is not valid UTF-8
Diffstat (limited to 'clang/lib/Basic/SourceManager.cpp')
-rw-r--r--clang/lib/Basic/SourceManager.cpp32
1 files changed, 31 insertions, 1 deletions
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 6e588ce..849c18f 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
@@ -156,8 +157,11 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM,
// Unless this is a named pipe (in which case we can handle a mismatch),
// check that the file's size is the same as in the file entry (which may
// have come from a stat cache).
+ // The buffer will always be larger than the file size on z/OS in the presence
+ // of characters outside the base character set.
+ assert(Buffer->getBufferSize() >= (size_t)ContentsEntry->getSize());
if (!ContentsEntry->isNamedPipe() &&
- Buffer->getBufferSize() != (size_t)ContentsEntry->getSize()) {
+ Buffer->getBufferSize() < (size_t)ContentsEntry->getSize()) {
Diag.Report(Loc, diag::err_file_modified) << ContentsEntry->getName();
return std::nullopt;
@@ -583,6 +587,18 @@ SourceManager::getOrCreateFileID(FileEntryRef SourceFile,
FileCharacter);
}
+/// Helper function to determine if an input file requires conversion
+bool needConversion(StringRef Filename) {
+#ifdef __MVS__
+ llvm::ErrorOr<bool> NeedConversion =
+ llvm::needzOSConversion(Filename.str().c_str());
+ assert(NeedConversion && "Filename was not found");
+ return *NeedConversion;
+#else
+ return false;
+#endif
+}
+
/// createFileID - Create a new FileID for the specified ContentCache and
/// include position. This works regardless of whether the ContentCache
/// corresponds to a file or some other input source.
@@ -602,6 +618,20 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
return FileID::get(LoadedID);
}
unsigned FileSize = File.getSize();
+ bool NeedConversion = needConversion(Filename);
+ if (NeedConversion) {
+ // Buffer size may increase due to potential z/OS EBCDIC to UTF-8
+ // conversion.
+ if (std::optional<llvm::MemoryBufferRef> Buffer =
+ File.getBufferOrNone(Diag, getFileManager())) {
+ unsigned BufSize = Buffer->getBufferSize();
+ if (BufSize > FileSize) {
+ if (File.ContentsEntry.has_value())
+ File.ContentsEntry->updateFileEntryBufferSize(BufSize);
+ FileSize = BufSize;
+ }
+ }
+ }
if (!(NextLocalOffset + FileSize + 1 > NextLocalOffset &&
NextLocalOffset + FileSize + 1 <= CurrentLoadedOffset)) {
Diag.Report(IncludePos, diag::err_sloc_space_too_large);