diff options
author | Simon Tatham <simon.tatham@arm.com> | 2022-05-03 10:33:11 +0100 |
---|---|---|
committer | Simon Tatham <simon.tatham@arm.com> | 2022-05-03 11:57:50 +0100 |
commit | 32814df442690d4673759296d850804773a7ea5b (patch) | |
tree | 9eda48776326782c9bd2daf68faac03289704213 /llvm/lib/Support/CommandLine.cpp | |
parent | 1be024ee450f2d3cb07086f6141d50f291c1910b (diff) | |
download | llvm-32814df442690d4673759296d850804773a7ea5b.zip llvm-32814df442690d4673759296d850804773a7ea5b.tar.gz llvm-32814df442690d4673759296d850804773a7ea5b.tar.bz2 |
[Windows] Fix handling of \" in program name on cmd line.
Bugzilla #47579: if you invoke clang on Windows via a pathname in
which a quoted section closes just after a backslash, e.g.
"C:\Program Files\Whatever\"clang.exe
then cmd.exe and CreateProcess will correctly find the binary, because
when they parse the program name at the start of the command line,
they don't regard the \ before the " as having any kind of escaping
effect. This is different from the behaviour of the Windows standard C
library when it parses the rest of the command line, which would
consider that \" not to close the quoted string.
But this confuses windows::GetCommandLineArguments, because the
Windows API function GetCommandLineW() will return a command line
containing that \" sequence, and cl::TokenizeWindowsCommandLine will
tokenize the whole string according to the C library's rules. So it
will misidentify where the program name stops and the arguments start.
To fix this, I've introduced a new variant function
cl::TokenizeWindowsCommandLineFull(), intended to be applied to the
string returned from GetCommandLineW(). It parses the first word of
the command line according to CreateProcess's rules, considering \ to
never be an escaping character; thereafter, it switches over to the C
library rules for the rest of the command line.
Reviewed By: hans
Differential Revision: https://reviews.llvm.org/D122914
Diffstat (limited to 'llvm/lib/Support/CommandLine.cpp')
-rw-r--r-- | llvm/lib/Support/CommandLine.cpp | 65 |
1 files changed, 52 insertions, 13 deletions
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 2f749bf..3e5fff9 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -918,21 +918,34 @@ static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) { return I - 1; } -// Windows treats whitespace, double quotes, and backslashes specially. +// Windows treats whitespace, double quotes, and backslashes specially, except +// when parsing the first token of a full command line, in which case +// backslashes are not special. static bool isWindowsSpecialChar(char C) { return isWhitespaceOrNull(C) || C == '\\' || C == '\"'; } +static bool isWindowsSpecialCharInCommandName(char C) { + return isWhitespaceOrNull(C) || C == '\"'; +} // Windows tokenization implementation. The implementation is designed to be // inlined and specialized for the two user entry points. -static inline void -tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, - function_ref<void(StringRef)> AddToken, - bool AlwaysCopy, function_ref<void()> MarkEOL) { +static inline void tokenizeWindowsCommandLineImpl( + StringRef Src, StringSaver &Saver, function_ref<void(StringRef)> AddToken, + bool AlwaysCopy, function_ref<void()> MarkEOL, bool InitialCommandName) { SmallString<128> Token; + // Sometimes, this function will be handling a full command line including an + // executable pathname at the start. In that situation, the initial pathname + // needs different handling from the following arguments, because when + // CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as + // escaping the quote character, whereas when libc scans the rest of the + // command line, it does. + bool CommandName = InitialCommandName; + // Try to do as much work inside the state machine as possible. enum { INIT, UNQUOTED, QUOTED } State = INIT; + for (size_t I = 0, E = Src.size(); I < E; ++I) { switch (State) { case INIT: { @@ -947,19 +960,29 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, if (I >= E) break; size_t Start = I; - while (I < E && !isWindowsSpecialChar(Src[I])) - ++I; + if (CommandName) { + while (I < E && !isWindowsSpecialCharInCommandName(Src[I])) + ++I; + } else { + while (I < E && !isWindowsSpecialChar(Src[I])) + ++I; + } StringRef NormalChars = Src.slice(Start, I); if (I >= E || isWhitespaceOrNull(Src[I])) { // No special characters: slice out the substring and start the next // token. Copy the string if the caller asks us to. AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars); - if (I < E && Src[I] == '\n') + if (I < E && Src[I] == '\n') { MarkEOL(); + CommandName = InitialCommandName; + } else { + CommandName = false; + } } else if (Src[I] == '\"') { Token += NormalChars; State = QUOTED; } else if (Src[I] == '\\') { + assert(!CommandName && "or else we'd have treated it as a normal char"); Token += NormalChars; I = parseBackslash(Src, I, Token); State = UNQUOTED; @@ -976,12 +999,16 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, // token. AddToken(Saver.save(Token.str())); Token.clear(); - if (Src[I] == '\n') + if (Src[I] == '\n') { + CommandName = InitialCommandName; MarkEOL(); + } else { + CommandName = false; + } State = INIT; } else if (Src[I] == '\"') { State = QUOTED; - } else if (Src[I] == '\\') { + } else if (Src[I] == '\\' && !CommandName) { I = parseBackslash(Src, I, Token); } else { Token.push_back(Src[I]); @@ -999,7 +1026,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, // Otherwise, end the quoted portion and return to the unquoted state. State = UNQUOTED; } - } else if (Src[I] == '\\') { + } else if (Src[I] == '\\' && !CommandName) { I = parseBackslash(Src, I, Token); } else { Token.push_back(Src[I]); @@ -1021,7 +1048,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, NewArgv.push_back(nullptr); }; tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, - /*AlwaysCopy=*/true, OnEOL); + /*AlwaysCopy=*/true, OnEOL, false); } void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver, @@ -1029,7 +1056,19 @@ void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver, auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok); }; auto OnEOL = []() {}; tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, /*AlwaysCopy=*/false, - OnEOL); + OnEOL, false); +} + +void cl::TokenizeWindowsCommandLineFull(StringRef Src, StringSaver &Saver, + SmallVectorImpl<const char *> &NewArgv, + bool MarkEOLs) { + auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok.data()); }; + auto OnEOL = [&]() { + if (MarkEOLs) + NewArgv.push_back(nullptr); + }; + tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, + /*AlwaysCopy=*/true, OnEOL, true); } void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver, |