aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/FileCheck/FileCheck.cpp
diff options
context:
space:
mode:
authorNikita Popov <npopov@redhat.com>2023-11-15 09:34:52 +0100
committerGitHub <noreply@github.com>2023-11-15 09:34:52 +0100
commit261b471015ef674253f03a4ae213919d983e016e (patch)
treeb279c784d710685f78c0efdcaa610311b816d9cd /llvm/lib/FileCheck/FileCheck.cpp
parent9a9933fae23249fbf6cf5b3c090e630f578b7f98 (diff)
downloadllvm-261b471015ef674253f03a4ae213919d983e016e.zip
llvm-261b471015ef674253f03a4ae213919d983e016e.tar.gz
llvm-261b471015ef674253f03a4ae213919d983e016e.tar.bz2
[FileCheck] Don't use regex to find prefixes (#72237)
FileCheck currently compiles a regular expression of the form `Prefix1|Prefix2|...` and uses it to find the next prefix in the input. If we had a fast regex implementation, this would be a useful thing to do, as the regex implementation would be able to match multiple prefixes more efficiently than a naive approach. However, with our actual regex implementation, finding the prefixes basically becomes O(InputLen * RegexLen * LargeConstantFactor), which is a lot worse than a simple string search. Replace the regex with StringRef::find(), and keeping track of the next position of each prefix. There are various ways this could be improved on, but it's already significantly faster that the previous approach. For me, this improves check-llvm time from 138.5s to 132.5s, so by around 4-5%. For vector-interleaved-load-i16-stride-7.ll in particular, test time drops from 5s to 2.5s.
Diffstat (limited to 'llvm/lib/FileCheck/FileCheck.cpp')
-rw-r--r--llvm/lib/FileCheck/FileCheck.cpp102
1 files changed, 62 insertions, 40 deletions
diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index bef6dd6..ac8f52d 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -1634,6 +1634,60 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
return Loc;
}
+static const char *DefaultCheckPrefixes[] = {"CHECK"};
+static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
+
+static void addDefaultPrefixes(FileCheckRequest &Req) {
+ if (Req.CheckPrefixes.empty()) {
+ for (const char *Prefix : DefaultCheckPrefixes)
+ Req.CheckPrefixes.push_back(Prefix);
+ Req.IsDefaultCheckPrefix = true;
+ }
+ if (Req.CommentPrefixes.empty())
+ for (const char *Prefix : DefaultCommentPrefixes)
+ Req.CommentPrefixes.push_back(Prefix);
+}
+
+struct PrefixMatcher {
+ /// Prefixes and their first occurrence past the current position.
+ SmallVector<std::pair<StringRef, size_t>> Prefixes;
+ StringRef Input;
+
+ PrefixMatcher(ArrayRef<StringRef> CheckPrefixes,
+ ArrayRef<StringRef> CommentPrefixes, StringRef Input)
+ : Input(Input) {
+ for (StringRef Prefix : CheckPrefixes)
+ Prefixes.push_back({Prefix, Input.find(Prefix)});
+ for (StringRef Prefix : CommentPrefixes)
+ Prefixes.push_back({Prefix, Input.find(Prefix)});
+
+ // Sort by descending length.
+ llvm::sort(Prefixes,
+ [](auto A, auto B) { return A.first.size() > B.first.size(); });
+ }
+
+ /// Find the next match of a prefix in Buffer.
+ /// Returns empty StringRef if not found.
+ StringRef match(StringRef Buffer) {
+ assert(Buffer.data() >= Input.data() &&
+ Buffer.data() + Buffer.size() == Input.data() + Input.size() &&
+ "Buffer must be suffix of Input");
+
+ size_t From = Buffer.data() - Input.data();
+ StringRef Match;
+ for (auto &[Prefix, Pos] : Prefixes) {
+ // If the last occurrence was before From, find the next one after From.
+ if (Pos < From)
+ Pos = Input.find(Prefix, From);
+ // Find the first prefix with the lowest position.
+ if (Pos != StringRef::npos &&
+ (Match.empty() || size_t(Match.data() - Input.data()) > Pos))
+ Match = StringRef(Input.substr(Pos, Prefix.size()));
+ }
+ return Match;
+ }
+};
+
/// Searches the buffer for the first prefix in the prefix regular expression.
///
/// This searches the buffer using the provided regular expression, however it
@@ -1658,20 +1712,16 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
/// is unspecified.
static std::pair<StringRef, StringRef>
-FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE,
+FindFirstMatchingPrefix(const FileCheckRequest &Req, PrefixMatcher &Matcher,
StringRef &Buffer, unsigned &LineNumber,
Check::FileCheckType &CheckTy) {
- SmallVector<StringRef, 2> Matches;
-
while (!Buffer.empty()) {
- // Find the first (longest) match using the RE.
- if (!PrefixRE.match(Buffer, &Matches))
+ // Find the first (longest) prefix match.
+ StringRef Prefix = Matcher.match(Buffer);
+ if (Prefix.empty())
// No match at all, bail.
return {StringRef(), StringRef()};
- StringRef Prefix = Matches[0];
- Matches.clear();
-
assert(Prefix.data() >= Buffer.data() &&
Prefix.data() < Buffer.data() + Buffer.size() &&
"Prefix doesn't start inside of buffer!");
@@ -1720,7 +1770,7 @@ FileCheck::FileCheck(FileCheckRequest Req)
FileCheck::~FileCheck() = default;
bool FileCheck::readCheckFile(
- SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+ SourceMgr &SM, StringRef Buffer,
std::pair<unsigned, unsigned> *ImpPatBufferIDRange) {
if (ImpPatBufferIDRange)
ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0;
@@ -1769,6 +1819,8 @@ bool FileCheck::readCheckFile(
// found.
unsigned LineNumber = 1;
+ addDefaultPrefixes(Req);
+ PrefixMatcher Matcher(Req.CheckPrefixes, Req.CommentPrefixes, Buffer);
std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(),
Req.CheckPrefixes.end());
const size_t DistinctPrefixes = PrefixesNotFound.size();
@@ -1779,7 +1831,7 @@ bool FileCheck::readCheckFile(
StringRef UsedPrefix;
StringRef AfterSuffix;
std::tie(UsedPrefix, AfterSuffix) =
- FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy);
+ FindFirstMatchingPrefix(Req, Matcher, Buffer, LineNumber, CheckTy);
if (UsedPrefix.empty())
break;
if (CheckTy != Check::CheckComment)
@@ -2431,9 +2483,6 @@ static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes,
return true;
}
-static const char *DefaultCheckPrefixes[] = {"CHECK"};
-static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
-
bool FileCheck::ValidateCheckPrefixes() {
StringSet<> UniquePrefixes;
// Add default prefixes to catch user-supplied duplicates of them below.
@@ -2454,33 +2503,6 @@ bool FileCheck::ValidateCheckPrefixes() {
return true;
}
-Regex FileCheck::buildCheckPrefixRegex() {
- if (Req.CheckPrefixes.empty()) {
- for (const char *Prefix : DefaultCheckPrefixes)
- Req.CheckPrefixes.push_back(Prefix);
- Req.IsDefaultCheckPrefix = true;
- }
- if (Req.CommentPrefixes.empty()) {
- for (const char *Prefix : DefaultCommentPrefixes)
- Req.CommentPrefixes.push_back(Prefix);
- }
-
- // We already validated the contents of CheckPrefixes and CommentPrefixes so
- // just concatenate them as alternatives.
- SmallString<32> PrefixRegexStr;
- for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
- if (I != 0)
- PrefixRegexStr.push_back('|');
- PrefixRegexStr.append(Req.CheckPrefixes[I]);
- }
- for (StringRef Prefix : Req.CommentPrefixes) {
- PrefixRegexStr.push_back('|');
- PrefixRegexStr.append(Prefix);
- }
-
- return Regex(PrefixRegexStr);
-}
-
Error FileCheckPatternContext::defineCmdlineVariables(
ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) {
assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&