aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaojian Wu <hokein.wu@gmail.com>2022-07-21 10:18:33 +0200
committerHaojian Wu <hokein.wu@gmail.com>2022-07-22 09:13:09 +0200
commit2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d (patch)
treef8711ab1aca12afd44d84535261bc48901be11cc
parent9daf945367044927f92d5cfd4bf2e94352c067fa (diff)
downloadllvm-2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d.zip
llvm-2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d.tar.gz
llvm-2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d.tar.bz2
[pseudo] Eliminate the dangling-else syntax ambiguity.
- the grammar ambiguity is eliminated by a guard; - modify the guard function signatures, now all parameters are folded in to a single object, avoid a long parameter list (as we will add more parameters in the near future); Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D130160
-rw-r--r--clang-tools-extra/pseudo/include/clang-pseudo/Language.h9
-rw-r--r--clang-tools-extra/pseudo/lib/GLR.cpp2
-rw-r--r--clang-tools-extra/pseudo/lib/cxx/CXX.cpp17
-rw-r--r--clang-tools-extra/pseudo/lib/cxx/cxx.bnf2
-rw-r--r--clang-tools-extra/pseudo/test/cxx/dangling-else.cpp22
-rw-r--r--clang-tools-extra/pseudo/unittests/GLRTest.cpp11
6 files changed, 50 insertions, 13 deletions
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
index 3696543..1a2b71f 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -19,6 +19,12 @@ class ForestNode;
class TokenStream;
class LRTable;
+struct GuardParams {
+ llvm::ArrayRef<const ForestNode *> RHS;
+ const TokenStream &Tokens;
+ // FIXME: use the index of Tokens.
+ SymbolID Lookahead;
+};
// A guard restricts when a grammar rule can be used.
//
// The GLR parser will use the guard to determine whether a rule reduction will
@@ -26,8 +32,7 @@ class LRTable;
// `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`.
//
// Return true if the guard is satisfied.
-using RuleGuard = llvm::function_ref<bool(
- llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
+using RuleGuard = llvm::function_ref<bool(const GuardParams &)>;
// A recovery strategy determines a region of code to skip when parsing fails.
//
diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index df8381d..ab230ac 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -421,7 +421,7 @@ private:
if (!R.Guarded)
return true;
if (auto Guard = Lang.Guards.lookup(RID))
- return Guard(RHS, Params.Code);
+ return Guard({RHS, Params.Code, Lookahead});
LLVM_DEBUG(llvm::dbgs()
<< llvm::formatv("missing guard implementation for rule {0}\n",
Lang.G.dumpRule(RID)));
diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 8fa24bf..7fc3a48 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -156,15 +156,19 @@ bool isFunctionDeclarator(const ForestNode *Declarator) {
llvm_unreachable("unreachable");
}
+bool guardNextTokenNotElse(const GuardParams &P) {
+ return symbolToToken(P.Lookahead) != tok::kw_else;
+}
+
llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
#define TOKEN_GUARD(kind, cond) \
- [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \
- const Token &Tok = onlyToken(tok::kind, RHS, Tokens); \
+ [](const GuardParams& P) { \
+ const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \
return cond; \
}
#define SYMBOL_GUARD(kind, cond) \
- [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \
- const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, RHS, Tokens); \
+ [](const GuardParams& P) { \
+ const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, P.RHS, P.Tokens); \
return cond; \
}
return {
@@ -186,6 +190,11 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
{(RuleID)Rule::contextual_zero_0numeric_constant,
TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
+ {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
+ guardNextTokenNotElse},
+ {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
+ guardNextTokenNotElse},
+
// The grammar distinguishes (only) user-defined vs plain string literals,
// where the clang lexer distinguishes (only) encoding types.
{(RuleID)Rule::user_defined_string_literal_chunk_0string_literal,
diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index d49fb8f..8138d0f 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -290,7 +290,7 @@ expression-statement := expression_opt ;
compound-statement := { statement-seq_opt [recover=Brackets] }
statement-seq := statement
statement-seq := statement-seq statement
-selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement
+selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement [guard]
selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement
selection-statement := SWITCH ( init-statement_opt condition ) statement
iteration-statement := WHILE ( condition ) statement
diff --git a/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp
new file mode 100644
index 0000000..151f393
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp
@@ -0,0 +1,22 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --start-symbol=statement-seq --print-forest | FileCheck %s
+
+// Verify the else should belong to the nested if statement
+if (true) if (true) {} else {}
+
+// CHECK: statement-seq~selection-statement := IF ( condition ) statement
+// CHECK-NEXT: ├─IF
+// CHECK-NEXT: ├─(
+// CHECK-NEXT: ├─condition~TRUE
+// CHECK-NEXT: ├─)
+// CHECK-NEXT: └─statement~selection-statement
+// CHECK-NEXT: ├─IF
+// CHECK-NEXT: ├─(
+// CHECK-NEXT: ├─condition~TRUE
+// CHECK-NEXT: ├─)
+// CHECK-NEXT: ├─statement~compound-statement := { }
+// CHECK-NEXT: │ ├─{
+// CHECK-NEXT: │ └─}
+// CHECK-NEXT: ├─ELSE
+// CHECK-NEXT: └─statement~compound-statement := { }
+// CHECK-NEXT: ├─{
+// CHECK-NEXT: └─}
diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 2c3ef26..5f87efe 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -634,11 +634,12 @@ TEST_F(GLRTest, GuardExtension) {
start := IDENTIFIER [guard]
)bnf");
TestLang.Guards.try_emplace(
- ruleFor("start"),
- [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
- assert(RHS.size() == 1 &&
- RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
- return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test";
+ ruleFor("start"), [&](const GuardParams &P) {
+ assert(P.RHS.size() == 1 &&
+ P.RHS.front()->symbol() ==
+ tokenSymbol(clang::tok::identifier));
+ return P.Tokens.tokens()[P.RHS.front()->startTokenIndex()]
+ .text() == "test";
});
clang::LangOptions LOptions;
TestLang.Table = LRTable::buildSLR(TestLang.G);