/** * Takes a token stream from the lexer, and parses it into an abstract syntax tree. * * Specification: C11 * * Copyright: Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/cparse.d, _cparse.d) * Documentation: https://dlang.org/phobos/dmd_cparse.html * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/cparse.d */ module dmd.cparse; import core.stdc.stdio; import core.stdc.string; import dmd.astenums; import dmd.globals; import dmd.id; import dmd.identifier; import dmd.lexer; import dmd.parse; import dmd.errors; import dmd.root.array; import dmd.root.filename; import dmd.common.outbuffer; import dmd.root.rmem; import dmd.root.rootobject; import dmd.root.string; import dmd.tokens; /*********************************************************** */ final class CParser(AST) : Parser!AST { AST.Dsymbols* symbols; // symbols declared in current scope bool addFuncName; /// add declaration of __func__ to function symbol table bool importBuiltins; /// seen use of C compiler builtins, so import __builtins; private { structalign_t packalign; // current state of #pragma pack alignment // #pragma pack stack Array!Identifier* records; // identifers (or null) Array!structalign_t* packs; // parallel alignment values } /* C cannot be parsed without determining if an identifier is a type or a variable. * For expressions like `(T)-3`, is it a cast or a minus expression? * It also occurs with `typedef int (F)(); F fun;` * but to build the AST we need to distinguish `fun` being a function as opposed to a variable. * To fix, build a symbol table for the typedefs. * Symbol table of typedefs indexed by Identifier cast to void*. * 1. if an identifier is a typedef, then it will return a non-null Type * 2. if an identifier is not a typedef, then it will return null */ Array!(void*) typedefTab; /// Array of AST.Type[Identifier], typedef's indexed by Identifier /* This is passed in as a list of #define lines, as generated by the C preprocessor with the * appropriate switch to emit them. We append to it any #define's and #undef's encountered in the source * file, as cpp with the -dD embeds them in the preprocessed output file. * Once the file is parsed, then the #define's are converted to D symbols and appended to the array * of Dsymbols returned by parseModule(). */ OutBuffer* defines; extern (D) this(TARGET)(AST.Module _module, const(char)[] input, bool doDocComment, const ref TARGET target, OutBuffer* defines) { super(_module, input, doDocComment); //printf("CParser.this()\n"); mod = _module; linkage = LINK.c; Ccompile = true; this.packalign.setDefault(); this.defines = defines; // Configure sizes for C `long`, `long double`, `wchar_t`, ... this.boolsize = target.boolsize; this.shortsize = target.shortsize; this.intsize = target.intsize; this.longsize = target.longsize; this.long_longsize = target.long_longsize; this.long_doublesize = target.long_doublesize; this.wchar_tsize = target.wchar_tsize; // C `char` is always unsigned in ImportC } /******************************************** * Parse translation unit. * C11 6.9 * translation-unit: * external-declaration * translation-unit external-declaration * * external-declaration: * function-definition * declaration * Returns: * array of Dsymbols that were declared */ override AST.Dsymbols* parseModule() { //printf("cparseTranslationUnit()\n"); symbols = new AST.Dsymbols(); typedefTab.push(null); // C11 6.2.1-3 symbol table for "file scope" while (1) { if (token.value == TOK.endOfFile) { addDefines(); // convert #define's to Dsymbols // wrap the symbols in `extern (C) { symbols }` auto wrap = new AST.Dsymbols(); auto ld = new AST.LinkDeclaration(token.loc, LINK.c, symbols); wrap.push(ld); if (importBuiltins) { /* Seen references to C builtin functions. * Import their definitions */ auto s = new AST.Import(Loc.initial, null, Id.builtins, null, false); wrap.push(s); } // end of file scope typedefTab.pop(); assert(typedefTab.length == 0); return wrap; } cparseDeclaration(LVL.global); } } /******************************************************************************/ /********************************* Statement Parser ***************************/ //{ /********************** * C11 6.8 * statement: * labeled-statement * compound-statement * expression-statement * selection-statement * iteration-statement * jump-statement * * Params: * flags = PSxxxx * endPtr = store location of closing brace * pEndloc = if { ... statements ... }, store location of closing brace, otherwise loc of last token of statement * Returns: * parsed statement */ AST.Statement cparseStatement(int flags, const(char)** endPtr = null, Loc* pEndloc = null) { AST.Statement s; const loc = token.loc; //printf("cparseStatement()\n"); const typedefTabLengthSave = typedefTab.length; auto symbolsSave = symbols; if (flags & ParseStatementFlags.scope_) { typedefTab.push(null); // introduce new block scope } if (!(flags & (ParseStatementFlags.scope_ | ParseStatementFlags.curlyScope))) { symbols = new AST.Dsymbols(); } switch (token.value) { case TOK.identifier: /* A leading identifier can be a declaration, label, or expression. * A quick check of the next token can disambiguate most cases. */ switch (peekNext()) { case TOK.colon: { // It's a label auto ident = token.ident; nextToken(); // advance to `:` nextToken(); // advance past `:` if (token.value == TOK.rightCurly) s = null; else if (token.value == TOK.leftCurly) s = cparseStatement(ParseStatementFlags.curly | ParseStatementFlags.scope_); else s = cparseStatement(ParseStatementFlags.semiOk); s = new AST.LabelStatement(loc, ident, s); break; } case TOK.dot: case TOK.arrow: case TOK.plusPlus: case TOK.minusMinus: case TOK.leftBracket: case TOK.question: case TOK.assign: case TOK.addAssign: case TOK.minAssign: case TOK.mulAssign: case TOK.divAssign: case TOK.modAssign: case TOK.andAssign: case TOK.orAssign: case TOK.xorAssign: case TOK.leftShiftAssign: case TOK.rightShiftAssign: goto Lexp; case TOK.leftParenthesis: if (auto pt = lookupTypedef(token.ident)) { if (*pt) goto Ldeclaration; } goto Lexp; // function call default: { /* If tokens look like a declaration, assume it is one */ auto tk = &token; if (isCDeclaration(tk)) goto Ldeclaration; goto Lexp; } } break; case TOK.charLiteral: case TOK.int32Literal: case TOK.uns32Literal: case TOK.int64Literal: case TOK.uns64Literal: case TOK.int128Literal: case TOK.uns128Literal: case TOK.float32Literal: case TOK.float64Literal: case TOK.float80Literal: case TOK.imaginary32Literal: case TOK.imaginary64Literal: case TOK.imaginary80Literal: case TOK.leftParenthesis: case TOK.and: case TOK.mul: case TOK.min: case TOK.add: case TOK.tilde: case TOK.not: case TOK.plusPlus: case TOK.minusMinus: case TOK.sizeof_: case TOK._Generic: Lexp: auto exp = cparseExpression(); if (token.value == TOK.identifier && exp.op == EXP.identifier) { error("found `%s` when expecting `;` or `=`, did you mean `%s %s = %s`?", peek(&token).toChars(), exp.toChars(), token.toChars(), peek(peek(&token)).toChars()); nextToken(); } else check(TOK.semicolon, "statement"); s = new AST.ExpStatement(loc, exp); break; // type-specifiers case TOK.void_: case TOK.char_: case TOK.int16: case TOK.int32: case TOK.int64: case TOK.float32: case TOK.float64: case TOK.signed: case TOK.unsigned: case TOK._Bool: //case TOK._Imaginary: case TOK._Complex: case TOK.struct_: case TOK.union_: case TOK.enum_: case TOK.typeof_: // storage-class-specifiers case TOK.typedef_: case TOK.extern_: case TOK.static_: case TOK._Thread_local: case TOK.auto_: case TOK.register: // function-specifiers case TOK.inline: case TOK._Noreturn: // type-qualifiers case TOK.const_: case TOK.volatile: case TOK.restrict: case TOK.__stdcall: // alignment-specifier case TOK._Alignas: // atomic-type-specifier or type_qualifier case TOK._Atomic: Ldeclaration: { cparseDeclaration(LVL.local); if (symbols.length > 1) { auto as = new AST.Statements(); as.reserve(symbols.length); foreach (d; (*symbols)[]) { s = new AST.ExpStatement(loc, d); as.push(s); } s = new AST.CompoundDeclarationStatement(loc, as); symbols.setDim(0); } else if (symbols.length == 1) { auto d = (*symbols)[0]; s = new AST.ExpStatement(loc, d); symbols.setDim(0); } else s = new AST.ExpStatement(loc, cast(AST.Expression)null); if (flags & ParseStatementFlags.scope_) s = new AST.ScopeStatement(loc, s, token.loc); break; } case TOK._Static_assert: // _Static_assert ( constant-expression, string-literal ) ; s = new AST.StaticAssertStatement(cparseStaticAssert()); break; case TOK.leftCurly: { /* C11 6.8.2 * compound-statement: * { block-item-list (opt) } * * block-item-list: * block-item * block-item-list block-item * * block-item: * declaration * statement */ nextToken(); auto statements = new AST.Statements(); while (token.value != TOK.rightCurly && token.value != TOK.endOfFile) { statements.push(cparseStatement(ParseStatementFlags.semi | ParseStatementFlags.curlyScope)); } if (endPtr) *endPtr = token.ptr; endloc = token.loc; if (pEndloc) { *pEndloc = token.loc; pEndloc = null; // don't set it again } s = new AST.CompoundStatement(loc, statements); if (flags & (ParseStatementFlags.scope_ | ParseStatementFlags.curlyScope)) s = new AST.ScopeStatement(loc, s, token.loc); check(TOK.rightCurly, "compound statement"); break; } case TOK.while_: { nextToken(); check(TOK.leftParenthesis); auto condition = cparseExpression(); check(TOK.rightParenthesis); Loc endloc; auto _body = cparseStatement(ParseStatementFlags.scope_, null, &endloc); s = new AST.WhileStatement(loc, condition, _body, endloc, null); break; } case TOK.semicolon: /* C11 6.8.3 null statement */ nextToken(); s = new AST.ExpStatement(loc, cast(AST.Expression)null); break; case TOK.do_: { nextToken(); auto _body = cparseStatement(ParseStatementFlags.scope_); check(TOK.while_); check(TOK.leftParenthesis); auto condition = cparseExpression(); check(TOK.rightParenthesis); check(TOK.semicolon, "terminating `;` required after do-while statement"); s = new AST.DoStatement(loc, _body, condition, token.loc); break; } case TOK.for_: { AST.Statement _init; AST.Expression condition; AST.Expression increment; nextToken(); check(TOK.leftParenthesis); if (token.value == TOK.semicolon) { _init = null; nextToken(); } else { _init = cparseStatement(0); } if (token.value == TOK.semicolon) { condition = null; nextToken(); } else { condition = cparseExpression(); check(TOK.semicolon, "`for` condition"); } if (token.value == TOK.rightParenthesis) { increment = null; nextToken(); } else { increment = cparseExpression(); check(TOK.rightParenthesis); } Loc endloc; auto _body = cparseStatement(ParseStatementFlags.scope_, null, &endloc); s = new AST.ForStatement(loc, _init, condition, increment, _body, endloc); break; } case TOK.if_: { nextToken(); check(TOK.leftParenthesis); auto condition = cparseExpression(); check(TOK.rightParenthesis); auto ifbody = cparseStatement(ParseStatementFlags.scope_); AST.Statement elsebody; if (token.value == TOK.else_) { nextToken(); elsebody = cparseStatement(ParseStatementFlags.scope_); } else elsebody = null; if (condition && ifbody) s = new AST.IfStatement(loc, null, condition, ifbody, elsebody, token.loc); else s = null; // don't propagate parsing errors break; } case TOK.else_: error("found `else` without a corresponding `if` statement"); goto Lerror; case TOK.switch_: { nextToken(); check(TOK.leftParenthesis); auto condition = cparseExpression(); check(TOK.rightParenthesis); auto _body = cparseStatement(ParseStatementFlags.scope_); s = new AST.SwitchStatement(loc, condition, _body, false); break; } case TOK.case_: { nextToken(); auto exp = cparseAssignExp(); check(TOK.colon); if (flags & ParseStatementFlags.curlyScope) { auto statements = new AST.Statements(); while (token.value != TOK.case_ && token.value != TOK.default_ && token.value != TOK.endOfFile && token.value != TOK.rightCurly) { auto cur = cparseStatement(ParseStatementFlags.semi | ParseStatementFlags.curlyScope); statements.push(cur); // https://issues.dlang.org/show_bug.cgi?id=21739 // Stop at the last break s.t. the following non-case statements are // not merged into the current case. This can happen for // case 1: ... break; // debug { case 2: ... } if (cur && cur.isBreakStatement()) break; } s = new AST.CompoundStatement(loc, statements); } else { s = cparseStatement(ParseStatementFlags.semi); } s = new AST.ScopeStatement(loc, s, token.loc); s = new AST.CaseStatement(loc, exp, s); break; } case TOK.default_: { nextToken(); check(TOK.colon); if (flags & ParseStatementFlags.curlyScope) { auto statements = new AST.Statements(); while (token.value != TOK.case_ && token.value != TOK.default_ && token.value != TOK.endOfFile && token.value != TOK.rightCurly) { statements.push(cparseStatement(ParseStatementFlags.semi | ParseStatementFlags.curlyScope)); } s = new AST.CompoundStatement(loc, statements); } else s = cparseStatement(ParseStatementFlags.semi); s = new AST.ScopeStatement(loc, s, token.loc); s = new AST.DefaultStatement(loc, s); break; } case TOK.return_: { /* return ; * return expression ; */ nextToken(); auto exp = token.value == TOK.semicolon ? null : cparseExpression(); check(TOK.semicolon, "`return` statement"); s = new AST.ReturnStatement(loc, exp); break; } case TOK.break_: nextToken(); check(TOK.semicolon, "`break` statement"); s = new AST.BreakStatement(loc, null); break; case TOK.continue_: nextToken(); check(TOK.semicolon, "`continue` statement"); s = new AST.ContinueStatement(loc, null); break; case TOK.goto_: { Identifier ident; nextToken(); if (token.value != TOK.identifier) { error("identifier expected following `goto`"); ident = null; } else { ident = token.ident; nextToken(); } s = new AST.GotoStatement(loc, ident); check(TOK.semicolon, "`goto` statement"); break; } case TOK.asm_: s = parseAsm(); break; default: error("found `%s` instead of statement", token.toChars()); goto Lerror; Lerror: panic(); if (token.value == TOK.semicolon) nextToken(); s = null; break; } if (pEndloc) *pEndloc = prevloc; symbols = symbolsSave; typedefTab.setDim(typedefTabLengthSave); return s; } //} /*******************************************************************************/ /********************************* Expression Parser ***************************/ //{ /************** * C11 6.5.17 * expression: * assignment-expression * expression , assignment-expression */ AST.Expression cparseExpression() { auto loc = token.loc; //printf("cparseExpression() loc = %d\n", loc.linnum); auto e = cparseAssignExp(); while (token.value == TOK.comma) { nextToken(); auto e2 = cparseAssignExp(); e = new AST.CommaExp(loc, e, e2, false); loc = token.loc; } return e; } /********************* * C11 6.5.1 * primary-expression: * identifier * constant * string-literal * ( expression ) * generic-selection * __builtin_va_arg(assign_expression, type) */ AST.Expression cparsePrimaryExp() { AST.Expression e; const loc = token.loc; //printf("parsePrimaryExp(): loc = %d\n", loc.linnum); switch (token.value) { case TOK.identifier: const id = token.ident.toString(); if (id.length > 2 && id[0] == '_' && id[1] == '_') // leading double underscore { if (token.ident is Id.__func__) { addFuncName = true; // implicitly declare __func__ } else if (token.ident is Id.builtin_va_arg) { e = cparseBuiltin_va_arg(); break; } else importBuiltins = true; // probably one of those compiler extensions } e = new AST.IdentifierExp(loc, token.ident); nextToken(); break; case TOK.charLiteral: case TOK.int32Literal: e = new AST.IntegerExp(loc, token.intvalue, AST.Type.tint32); nextToken(); break; case TOK.uns32Literal: e = new AST.IntegerExp(loc, token.unsvalue, AST.Type.tuns32); nextToken(); break; case TOK.int64Literal: e = new AST.IntegerExp(loc, token.intvalue, AST.Type.tint64); nextToken(); break; case TOK.uns64Literal: e = new AST.IntegerExp(loc, token.unsvalue, AST.Type.tuns64); nextToken(); break; case TOK.float32Literal: e = new AST.RealExp(loc, token.floatvalue, AST.Type.tfloat32); nextToken(); break; case TOK.float64Literal: e = new AST.RealExp(loc, token.floatvalue, AST.Type.tfloat64); nextToken(); break; case TOK.float80Literal: e = new AST.RealExp(loc, token.floatvalue, AST.Type.tfloat80); nextToken(); break; case TOK.imaginary32Literal: e = new AST.RealExp(loc, token.floatvalue, AST.Type.timaginary32); nextToken(); break; case TOK.imaginary64Literal: e = new AST.RealExp(loc, token.floatvalue, AST.Type.timaginary64); nextToken(); break; case TOK.imaginary80Literal: e = new AST.RealExp(loc, token.floatvalue, AST.Type.timaginary80); nextToken(); break; case TOK.string_: { // cat adjacent strings auto s = token.ustring; auto len = token.len; auto postfix = token.postfix; while (1) { nextToken(); if (token.value == TOK.string_) { if (token.postfix) { if (token.postfix != postfix) error("mismatched string literal postfixes `'%c'` and `'%c'`", postfix, token.postfix); postfix = token.postfix; } const len1 = len; const len2 = token.len; len = len1 + len2; auto s2 = cast(char*)mem.xmalloc_noscan(len * char.sizeof); memcpy(s2, s, len1 * char.sizeof); memcpy(s2 + len1, token.ustring, len2 * char.sizeof); s = s2; } else break; } e = new AST.StringExp(loc, s[0 .. len], len, 1, postfix); break; } case TOK.leftParenthesis: nextToken(); e = cparseExpression(); check(TOK.rightParenthesis); break; case TOK._Generic: e = cparseGenericSelection(); break; default: error("expression expected, not `%s`", token.toChars()); // Anything for e, as long as it's not NULL e = new AST.IntegerExp(loc, 0, AST.Type.tint32); nextToken(); break; } return e; } /********************************* * C11 6.5.2 * postfix-expression: * primary-expression * postfix-expression [ expression ] * postfix-expression ( argument-expression-list (opt) ) * postfix-expression . identifier * postfix-expression -> identifier * postfix-expression ++ * postfix-expression -- * ( type-name ) { initializer-list } * ( type-name ) { initializer-list , } * * argument-expression-list: * assignment-expression * argument-expression-list , assignment-expression */ private AST.Expression cparsePostfixExp(AST.Expression e) { e = cparsePrimaryExp(); return cparsePostfixOperators(e); } /******************************** * C11 6.5.2 * Parse a series of operators for a postfix expression after already parsing * a primary-expression or compound literal expression. * Params: * e = parsed primary or compound literal expression * Returns: * parsed postfix expression */ private AST.Expression cparsePostfixOperators(AST.Expression e) { while (1) { const loc = token.loc; switch (token.value) { case TOK.dot: nextToken(); if (token.value == TOK.identifier) { Identifier id = token.ident; e = new AST.DotIdExp(loc, e, id); break; } error("identifier expected following `.`, not `%s`", token.toChars()); break; case TOK.arrow: nextToken(); if (token.value == TOK.identifier) { Identifier id = token.ident; auto die = new AST.DotIdExp(loc, e, id); die.arrow = true; e = die; break; } error("identifier expected following `->`, not `%s`", token.toChars()); break; case TOK.plusPlus: e = new AST.PostExp(EXP.plusPlus, loc, e); break; case TOK.minusMinus: e = new AST.PostExp(EXP.minusMinus, loc, e); break; case TOK.leftParenthesis: e = new AST.CallExp(loc, e, cparseArguments()); continue; case TOK.leftBracket: { // array dereferences: // array[index] AST.Expression index; auto arguments = new AST.Expressions(); inBrackets++; nextToken(); index = cparseAssignExp(); arguments.push(index); check(TOK.rightBracket); inBrackets--; e = new AST.ArrayExp(loc, e, arguments); continue; } default: return e; } nextToken(); } } /************************ * C11 6.5.3 * unary-expression: * postfix-expression * ++ unary-expression * -- unary-expression * unary-operator cast-expression * sizeof unary-expression * sizeof ( type-name ) * _Alignof ( type-name ) * * unary-operator: * & * + - ~ ! */ private AST.Expression cparseUnaryExp() { AST.Expression e; const loc = token.loc; switch (token.value) { case TOK.plusPlus: nextToken(); // Parse `++` as an unary operator so that cast expressions only give // an error for being non-lvalues. e = cparseCastExp(); e = new AST.PreExp(EXP.prePlusPlus, loc, e); break; case TOK.minusMinus: nextToken(); // Parse `--` as an unary operator, same as prefix increment. e = cparseCastExp(); e = new AST.PreExp(EXP.preMinusMinus, loc, e); break; case TOK.and: nextToken(); e = cparseCastExp(); e = new AST.AddrExp(loc, e); break; case TOK.mul: nextToken(); e = cparseCastExp(); e = new AST.PtrExp(loc, e); break; case TOK.min: nextToken(); e = cparseCastExp(); e = new AST.NegExp(loc, e); break; case TOK.add: nextToken(); e = cparseCastExp(); e = new AST.UAddExp(loc, e); break; case TOK.not: nextToken(); e = cparseCastExp(); e = new AST.NotExp(loc, e); break; case TOK.tilde: nextToken(); e = cparseCastExp(); e = new AST.ComExp(loc, e); break; case TOK.sizeof_: { nextToken(); if (token.value == TOK.leftParenthesis) { auto tk = peek(&token); if (isTypeName(tk)) { /* Expression may be either be requesting the sizeof a type-name * or a compound literal, which requires checking whether * the next token is leftCurly */ nextToken(); auto t = cparseTypeName(); check(TOK.rightParenthesis); if (token.value == TOK.leftCurly) { // ( type-name ) { initializer-list } auto ci = cparseInitializer(); e = new AST.CompoundLiteralExp(loc, t, ci); e = cparsePostfixOperators(e); } else { // ( type-name ) e = new AST.TypeExp(loc, t); } } else { // must be an expression e = cparseUnaryExp(); } } else { //C11 6.5.3 e = cparseUnaryExp(); } e = new AST.DotIdExp(loc, e, Id.__sizeof); break; } case TOK._Alignof: { nextToken(); check(TOK.leftParenthesis); auto t = cparseTypeName(); check(TOK.rightParenthesis); e = new AST.TypeExp(loc, t); e = new AST.DotIdExp(loc, e, Id.__xalignof); break; } default: e = cparsePostfixExp(e); break; } assert(e); return e; } /************** * C11 6.5.4 * cast-expression * unary-expression * ( type-name ) cast-expression */ private AST.Expression cparseCastExp() { if (token.value == TOK.leftParenthesis) { //printf("cparseCastExp()\n"); auto tk = peek(&token); bool iscast; bool isexp; if (tk.value == TOK.identifier) { iscast = isTypedef(tk.ident); isexp = !iscast; } if (isexp) { // ( identifier ) is an expression return cparseUnaryExp(); } // If ( type-name ) auto pt = &token; if (isCastExpression(pt)) { // Expression may be either a cast or a compound literal, which // requires checking whether the next token is leftCurly const loc = token.loc; nextToken(); auto t = cparseTypeName(); check(TOK.rightParenthesis); pt = &token; if (token.value == TOK.leftCurly) { // C11 6.5.2.5 ( type-name ) { initializer-list } auto ci = cparseInitializer(); auto ce = new AST.CompoundLiteralExp(loc, t, ci); return cparsePostfixOperators(ce); } if (iscast) { // ( type-name ) cast-expression auto ce = cparseCastExp(); return new AST.CastExp(loc, ce, t); } if (t.isTypeIdentifier() && isexp && token.value == TOK.leftParenthesis && !isCastExpression(pt)) { /* (t)(...)... might be a cast expression or a function call, * with different grammars: a cast would be cparseCastExp(), * a function call would be cparsePostfixExp(CallExp(cparseArguments())). * We can't know until t is known. So, parse it as a function call * and let semantic() rewrite the AST as a CastExp if it turns out * to be a type. */ auto ie = new AST.IdentifierExp(loc, t.isTypeIdentifier().ident); ie.parens = true; // let semantic know it might be a CastExp AST.Expression e = new AST.CallExp(loc, ie, cparseArguments()); return cparsePostfixOperators(e); } // ( type-name ) cast-expression auto ce = cparseCastExp(); return new AST.CastExp(loc, ce, t); } } return cparseUnaryExp(); } /************** * C11 6.5.5 * multiplicative-expression * cast-expression * multiplicative-expression * cast-expression * multiplicative-expression / cast-expression * multiplicative-expression % cast-expression */ private AST.Expression cparseMulExp() { const loc = token.loc; auto e = cparseCastExp(); while (1) { switch (token.value) { case TOK.mul: nextToken(); auto e2 = cparseCastExp(); e = new AST.MulExp(loc, e, e2); continue; case TOK.div: nextToken(); auto e2 = cparseCastExp(); e = new AST.DivExp(loc, e, e2); continue; case TOK.mod: nextToken(); auto e2 = cparseCastExp(); e = new AST.ModExp(loc, e, e2); continue; default: break; } break; } return e; } /************** * C11 6.5.6 * additive-expression * multiplicative-expression * additive-expression + multiplicative-expression * additive-expression - multiplicative-expression */ private AST.Expression cparseAddExp() { const loc = token.loc; auto e = cparseMulExp(); while (1) { switch (token.value) { case TOK.add: nextToken(); auto e2 = cparseMulExp(); e = new AST.AddExp(loc, e, e2); continue; case TOK.min: nextToken(); auto e2 = cparseMulExp(); e = new AST.MinExp(loc, e, e2); continue; default: break; } break; } return e; } /************** * C11 6.5.7 * shift-expression * additive-expression * shift-expression << additive-expression * shift-expression >> additive-expression */ private AST.Expression cparseShiftExp() { const loc = token.loc; auto e = cparseAddExp(); while (1) { switch (token.value) { case TOK.leftShift: nextToken(); auto e2 = cparseAddExp(); e = new AST.ShlExp(loc, e, e2); continue; case TOK.rightShift: nextToken(); auto e2 = cparseAddExp(); e = new AST.ShrExp(loc, e, e2); continue; default: break; } break; } return e; } /************** * C11 6.5.8 * relational-expression * shift-expression * relational-expression < shift-expression * relational-expression > shift-expression * relational-expression <= shift-expression * relational-expression >= shift-expression */ private AST.Expression cparseRelationalExp() { const loc = token.loc; auto e = cparseShiftExp(); EXP op = EXP.reserved; switch (token.value) { case TOK.lessThan: op = EXP.lessThan; goto Lcmp; case TOK.lessOrEqual: op = EXP.lessOrEqual; goto Lcmp; case TOK.greaterThan: op = EXP.greaterThan; goto Lcmp; case TOK.greaterOrEqual: op = EXP.greaterOrEqual; goto Lcmp; Lcmp: nextToken(); auto e2 = cparseShiftExp(); e = new AST.CmpExp(op, loc, e, e2); break; default: break; } return e; } /************** * C11 6.5.9 * equality-expression * relational-expression * equality-expression == relational-expression * equality-expression != relational-expression */ private AST.Expression cparseEqualityExp() { const loc = token.loc; auto e = cparseRelationalExp(); EXP op = EXP.reserved; switch (token.value) { case TOK.equal: op = EXP.equal; goto Lequal; case TOK.notEqual: op = EXP.notEqual; goto Lequal; Lequal: nextToken(); auto e2 = cparseRelationalExp(); e = new AST.EqualExp(op, loc, e, e2); break; default: break; } return e; } /************** * C11 6.5.10 * AND-expression * equality-expression * AND-expression & equality-expression */ private AST.Expression cparseAndExp() { Loc loc = token.loc; auto e = cparseEqualityExp(); while (token.value == TOK.and) { nextToken(); auto e2 = cparseEqualityExp(); e = new AST.AndExp(loc, e, e2); loc = token.loc; } return e; } /************** * C11 6.5.11 * exclusive-OR-expression * AND-expression * exclusive-OR-expression ^ AND-expression */ private AST.Expression cparseXorExp() { const loc = token.loc; auto e = cparseAndExp(); while (token.value == TOK.xor) { nextToken(); auto e2 = cparseAndExp(); e = new AST.XorExp(loc, e, e2); } return e; } /************** * C11 6.5.12 * inclusive-OR-expression * exclusive-OR-expression * inclusive-OR-expression | exclusive-OR-expression */ private AST.Expression cparseOrExp() { const loc = token.loc; auto e = cparseXorExp(); while (token.value == TOK.or) { nextToken(); auto e2 = cparseXorExp(); e = new AST.OrExp(loc, e, e2); } return e; } /************** * C11 6.5.13 * logical-AND-expression * inclusive-OR-expression * logical-AND-expression && inclusive-OR-expression */ private AST.Expression cparseAndAndExp() { const loc = token.loc; auto e = cparseOrExp(); while (token.value == TOK.andAnd) { nextToken(); auto e2 = cparseOrExp(); e = new AST.LogicalExp(loc, EXP.andAnd, e, e2); } return e; } /************** * C11 6.5.14 * logical-OR-expression * logical-AND-expression * logical-OR-expression || logical-AND-expression */ private AST.Expression cparseOrOrExp() { const loc = token.loc; auto e = cparseAndAndExp(); while (token.value == TOK.orOr) { nextToken(); auto e2 = cparseAndAndExp(); e = new AST.LogicalExp(loc, EXP.orOr, e, e2); } return e; } /************** * C11 6.5.15 * conditional-expression: * logical-OR-expression * logical-OR-expression ? expression : conditional-expression */ private AST.Expression cparseCondExp() { const loc = token.loc; auto e = cparseOrOrExp(); if (token.value == TOK.question) { nextToken(); auto e1 = cparseExpression(); check(TOK.colon); auto e2 = cparseCondExp(); e = new AST.CondExp(loc, e, e1, e2); } return e; } /************** * C11 6.5.16 * assignment-expression: * conditional-expression * unary-expression assignment-operator assignment-expression * * assignment-operator: * = *= /= %= += -= <<= >>= &= ^= |= */ AST.Expression cparseAssignExp() { AST.Expression e; e = cparseCondExp(); // constrain it to being unary-expression in semantic pass if (e is null) return e; const loc = token.loc; switch (token.value) { case TOK.assign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.AssignExp(loc, e, e2); break; case TOK.addAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.AddAssignExp(loc, e, e2); break; case TOK.minAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.MinAssignExp(loc, e, e2); break; case TOK.mulAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.MulAssignExp(loc, e, e2); break; case TOK.divAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.DivAssignExp(loc, e, e2); break; case TOK.modAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.ModAssignExp(loc, e, e2); break; case TOK.andAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.AndAssignExp(loc, e, e2); break; case TOK.orAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.OrAssignExp(loc, e, e2); break; case TOK.xorAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.XorAssignExp(loc, e, e2); break; case TOK.leftShiftAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.ShlAssignExp(loc, e, e2); break; case TOK.rightShiftAssign: nextToken(); auto e2 = cparseAssignExp(); e = new AST.ShrAssignExp(loc, e, e2); break; default: break; } return e; } /*********************** * C11 6.5.1.1 * _Generic ( assignment-expression, generic-assoc-list ) * * generic-assoc-list: * generic-association * generic-assoc-list generic-association * * generic-association: * type-name : assignment-expression * default : assignment-expression */ private AST.Expression cparseGenericSelection() { const loc = token.loc; nextToken(); check(TOK.leftParenthesis); auto cntlExp = cparseAssignExp(); check(TOK.comma); auto types = new AST.Types(); auto exps = new AST.Expressions(); bool sawDefault; while (1) { AST.Type t; if (token.value == TOK.default_) { nextToken(); if (sawDefault) error("only one `default` allowed in generic-assoc-list"); sawDefault = true; t = null; } else t = cparseTypeName(); types.push(t); check(TOK.colon); auto e = cparseAssignExp(); exps.push(e); if (token.value == TOK.rightParenthesis || token.value == TOK.endOfFile) break; check(TOK.comma); } check(TOK.rightParenthesis); return new AST.GenericExp(loc, cntlExp, types, exps); } /*********************** * C11 6.6 Constant expressions * constant-expression: * conditional-expression */ private AST.Expression cparseConstantExp() { return cparseAssignExp(); } /***************************** * gcc extension: * type __builtin_va_arg(assign-expression, type) * Rewrite as `va_arg` template from `core.stdc.stdarg`: * va_arg!(type)(assign-expression); * Lexer is on `__builtin_va_arg` */ private AST.Expression cparseBuiltin_va_arg() { importBuiltins = true; // need core.stdc.stdarg nextToken(); check(TOK.leftParenthesis); auto arguments = new AST.Expressions(); auto arg = cparseAssignExp(); arguments.push(arg); check(TOK.comma); auto t = cparseTypeName(); auto tiargs = new AST.Objects(); tiargs.push(t); const loc = loc; auto ti = new AST.TemplateInstance(loc, Id.va_arg, tiargs); auto tie = new AST.ScopeExp(loc, ti); AST.Expression e = new AST.CallExp(loc, tie, arguments); check(TOK.rightParenthesis); return e; } //} /********************************************************************************/ /********************************* Declaration Parser ***************************/ //{ /************************************* * C11 6.7 * declaration: * declaration-specifiers init-declarator-list (opt) ; * static_assert-declaration * * init-declarator-list: * init-declarator * init-declarator-list , init-declarator * * init-declarator: * declarator * declarator = initializer * * Params: * level = declaration context */ void cparseDeclaration(LVL level) { //printf("cparseDeclaration(level = %d)\n", level); if (token.value == TOK._Static_assert) { auto s = cparseStaticAssert(); symbols.push(s); return; } if (token.value == TOK.__pragma) { uupragmaDirective(scanloc); return; } if (token.value == TOK._import) // import declaration extension { auto a = parseImport(); if (a && a.length) symbols.append(a); return; } const typedefTabLengthSave = typedefTab.length; auto symbolsSave = symbols; Specifier specifier; specifier.packalign = this.packalign; auto tspec = cparseDeclarationSpecifiers(level, specifier); /* If a declarator does not follow, it is unnamed */ if (token.value == TOK.semicolon) { if (!tspec) { nextToken(); return; // accept empty declaration as an extension } if (auto ti = tspec.isTypeIdentifier()) { // C11 6.7.2-2 error("type-specifier missing for declaration of `%s`", ti.ident.toChars()); nextToken(); return; } nextToken(); auto tt = tspec.isTypeTag(); if (!tt || !tt.id && (tt.tok == TOK.struct_ || tt.tok == TOK.union_)) return; // legal but meaningless empty declaration, ignore it /* `struct tag;` and `struct tag { ... };` * always result in a declaration in the current scope */ auto stag = (tt.tok == TOK.struct_) ? new AST.StructDeclaration(tt.loc, tt.id, false) : (tt.tok == TOK.union_) ? new AST.UnionDeclaration(tt.loc, tt.id) : new AST.EnumDeclaration(tt.loc, tt.id, tt.base); stag.members = tt.members; if (!symbols) symbols = new AST.Dsymbols(); auto stags = applySpecifier(stag, specifier); symbols.push(stags); if (0 && tt.tok == TOK.enum_) // C11 proscribes enums with no members, but we allow it { if (!tt.members) error(tt.loc, "`enum %s` has no members", stag.toChars()); } return; } if (!tspec) { error("no type for declarator before `%s`", token.toChars()); panic(); nextToken(); return; } if (tspec && specifier.mod & MOD.xconst) { tspec = toConst(tspec); specifier.mod &= ~MOD.xnone; // 'used' it } void scanPastSemicolon() { while (token.value != TOK.semicolon && token.value != TOK.endOfFile) nextToken(); nextToken(); } if (token.value == TOK.assign && tspec && tspec.isTypeIdentifier()) { /* C11 6.7.2-2 * Special check for `const b = 1;` because some compilers allow it */ error("type-specifier omitted for declaration of `%s`", tspec.isTypeIdentifier().ident.toChars()); return scanPastSemicolon(); } bool first = true; while (1) { Identifier id; AST.StringExp asmName; auto dt = cparseDeclarator(DTR.xdirect, tspec, id, specifier); if (!dt) { panic(); nextToken(); break; // error recovery } /* GNU Extensions * init-declarator: * declarator simple-asm-expr (opt) gnu-attributes (opt) * declarator simple-asm-expr (opt) gnu-attributes (opt) = initializer */ switch (token.value) { case TOK.assign: case TOK.comma: case TOK.semicolon: case TOK.asm_: case TOK.__attribute__: if (token.value == TOK.asm_) asmName = cparseSimpleAsmExpr(); if (token.value == TOK.__attribute__) { cparseGnuAttributes(specifier); if (token.value == TOK.leftCurly) break; // function definition } /* This is a data definition, there cannot now be a * function definition. */ first = false; break; default: break; } if (specifier.alignExps && dt.isTypeFunction()) error("no alignment-specifier for function declaration"); // C11 6.7.5-2 if (specifier.alignExps && specifier.scw == SCW.xregister) error("no alignment-specifier for `register` storage class"); // C11 6.7.5-2 /* C11 6.9.1 Function Definitions * function-definition: * declaration-specifiers declarator declaration-list (opt) compound-statement * * declaration-list: * declaration * declaration-list declaration */ auto t = &token; if (first && // first declarator id && dt.isTypeFunction() && // function type not inherited from a typedef isDeclarationList(t) && // optional declaration-list level == LVL.global && // function definitions only at global scope t.value == TOK.leftCurly) // start of compound-statement { auto s = cparseFunctionDefinition(id, dt.isTypeFunction(), specifier); typedefTab.setDim(typedefTabLengthSave); symbols = symbolsSave; symbols.push(s); return; } AST.Dsymbol s = null; typedefTab.setDim(typedefTabLengthSave); symbols = symbolsSave; if (!symbols) symbols = new AST.Dsymbols; // lazilly create it if (level != LVL.global && !tspec && !specifier.scw && !specifier.mod) error("declaration-specifier-seq required"); else if (specifier.scw == SCW.xtypedef) { if (token.value == TOK.assign) error("no initializer for typedef declaration"); if (specifier.alignExps) error("no alignment-specifier for typedef declaration"); // C11 6.7.5-2 bool isalias = true; if (auto ts = dt.isTypeStruct()) { if (ts.sym.isAnonymous()) { // This is a typedef for an anonymous struct-or-union. // Directly set the ident for the struct-or-union. ts.sym.ident = id; isalias = false; } } else if (auto te = dt.isTypeEnum()) { if (te.sym.isAnonymous()) { // This is a typedef for an anonymous enum. te.sym.ident = id; isalias = false; } } else if (auto tt = dt.isTypeTag()) { if (tt.id || tt.tok == TOK.enum_) { /* `struct tag;` and `struct tag { ... };` * always result in a declaration in the current scope */ auto stag = (tt.tok == TOK.struct_) ? new AST.StructDeclaration(tt.loc, tt.id, false) : (tt.tok == TOK.union_) ? new AST.UnionDeclaration(tt.loc, tt.id) : new AST.EnumDeclaration(tt.loc, tt.id, tt.base); stag.members = tt.members; tt.members = null; if (!symbols) symbols = new AST.Dsymbols(); symbols.push(stag); if (tt.tok == TOK.enum_) { isalias = false; s = new AST.AliasDeclaration(token.loc, id, stag); } } } if (isalias) s = new AST.AliasDeclaration(token.loc, id, dt); insertTypedefToTypedefTab(id, dt); // remember typedefs } else if (id) { if (level == LVL.prototype) break; // declared later as Parameter, not VarDeclaration if (dt.ty == AST.Tvoid) error("`void` has no value"); AST.Initializer initializer; bool hasInitializer; if (token.value == TOK.assign) { nextToken(); hasInitializer = true; initializer = cparseInitializer(); } // declare the symbol assert(id); if (isFunctionTypedef(dt)) { if (hasInitializer) error("no initializer for function declaration"); if (specifier.scw & SCW.x_Thread_local) error("functions cannot be `_Thread_local`"); // C11 6.7.1-4 auto fd = new AST.FuncDeclaration(token.loc, Loc.initial, id, specifiersToSTC(level, specifier), dt, specifier.noreturn); s = fd; } else { // Give non-extern variables an implicit void initializer // if one has not been explicitly set. if (!hasInitializer && !(specifier.scw & (SCW.xextern | SCW.xstatic | SCW.x_Thread_local) || level == LVL.global)) initializer = new AST.VoidInitializer(token.loc); s = new AST.VarDeclaration(token.loc, dt, id, initializer, specifiersToSTC(level, specifier)); } if (level != LVL.global) insertIdToTypedefTab(id); // non-typedef declarations can hide typedefs in outer scopes } if (s !is null) { // Saw `asm("name")` in the function, type, or variable definition. // This is equivalent to `pragma(mangle, "name")` in D if (asmName) { /* https://issues.dlang.org/show_bug.cgi?id=23012 Ideally this would be translated to a pragma(mangle) decl. This is not possible because ImportC symbols are (currently) merged before semantic analysis is performed, so the pragma(mangle) never effects any change on the declarations it pertains too. Writing to mangleOverride directly avoids this, and is possible because C only a StringExp is allowed unlike a full fat pragma(mangle) which is more liberal. */ if (auto p = s.isDeclaration()) { auto str = asmName.peekString(); p.mangleOverride = str; // p.adFlags |= AST.VarDeclaration.nounderscore; p.adFlags |= 4; // cannot get above line to compile on Ubuntu } } s = applySpecifier(s, specifier); if (level == LVL.local) { // Wrap the declaration in `extern (C) { declaration }` // Necessary for function pointers, but harmless to apply to all. auto decls = new AST.Dsymbols(1); (*decls)[0] = s; s = new AST.LinkDeclaration(s.loc, linkage, decls); } symbols.push(s); } first = false; switch (token.value) { case TOK.identifier: if (s) { error("missing comma or semicolon after declaration of `%s`, found `%s` instead", s.toChars(), token.toChars()); goto Lend; } goto default; case TOK.semicolon: nextToken(); return; case TOK.comma: if (!symbolsSave) symbolsSave = symbols; nextToken(); break; default: error("`=`, `;` or `,` expected to end declaration instead of `%s`", token.toChars()); Lend: return scanPastSemicolon(); } } } /*************************************** * C11 Function Definitions * function-definition * declaration-specifiers declarator declaration-list (opt) compound-statement * * declaration-list: * declaration * declaration-list declaration * * It's already been parsed up to the declaration-list (opt). * Pick it up from there. * Params: * id = function identifier * ft = function type * specifier = function specifiers * Returns: * Dsymbol for the function */ AST.Dsymbol cparseFunctionDefinition(Identifier id, AST.TypeFunction ft, ref Specifier specifier) { /* Start function scope */ typedefTab.push(null); if (token.value != TOK.leftCurly) // if not start of a compound-statement { // Do declaration-list do { cparseDeclaration(LVL.parameter); } while (token.value != TOK.leftCurly); /* Since there were declarations, the parameter-list must have been * an identifier-list. */ ft.parameterList.hasIdentifierList = true; // semantic needs to know to adjust parameter types auto pl = ft.parameterList; if (pl.varargs != AST.VarArg.none && pl.length) error("function identifier-list cannot end with `...`"); ft.parameterList.varargs = AST.VarArg.variadic; // but C11 allows extra arguments importBuiltins = true; // will need __va_list_tag auto plLength = pl.length; if (symbols.length != plLength) error("%d identifiers does not match %d declarations", cast(int)plLength, cast(int)symbols.length); /* Transfer the types and storage classes from symbols[] to pl[] */ foreach (i; 0 .. plLength) { auto p = pl[i]; // yes, quadratic // Convert typedef-identifier to identifier if (p.type) { if (auto t = p.type.isTypeIdentifier()) { p.ident = t.ident; p.type = null; } } if (p.type || !(p.storageClass & STC.parameter)) error("storage class and type are not allowed in identifier-list"); foreach (s; (*symbols)[]) // yes, quadratic { auto d = s.isDeclaration(); if (d && p.ident == d.ident && d.type) { p.type = d.type; p.storageClass = d.storage_class; d.type = null; // don't reuse break; } } if (!p.type) { error("no declaration for identifier `%s`", p.ident.toChars()); p.type = AST.Type.terror; } } } addFuncName = false; // gets set to true if somebody references __func__ in this function const locFunc = token.loc; auto body = cparseStatement(ParseStatementFlags.curly); // don't start a new scope; continue with parameter scope typedefTab.pop(); // end of function scope auto fd = new AST.FuncDeclaration(locFunc, prevloc, id, specifiersToSTC(LVL.global, specifier), ft, specifier.noreturn); if (addFuncName) { auto s = createFuncName(locFunc, id); body = new AST.CompoundStatement(locFunc, s, body); } fd.fbody = body; // TODO add `symbols` to the function's local symbol table `sc2` in FuncDeclaration::semantic3() return fd; } /*************************************** * C11 Initialization * initializer: * assignment-expression * { initializer-list } * { initializer-list , } * * initializer-list: * designation (opt) initializer * initializer-list , designation (opt) initializer * * designation: * designator-list = * * designator-list: * designator * designator-list designator * * designator: * [ constant-expression ] * . identifier * Returns: * initializer */ AST.Initializer cparseInitializer() { if (token.value != TOK.leftCurly) { auto ae = cparseAssignExp(); // assignment-expression return new AST.ExpInitializer(token.loc, ae); } nextToken(); const loc = token.loc; /* Collect one or more `designation (opt) initializer` * into ci.initializerList, but lazily create ci */ AST.CInitializer ci; while (1) { /* There can be 0 or more designators preceding an initializer. * Collect them in desigInit */ AST.DesigInit desigInit; while (1) { if (token.value == TOK.leftBracket) // [ constant-expression ] { nextToken(); auto e = cparseConstantExp(); check(TOK.rightBracket); if (!desigInit.designatorList) desigInit.designatorList = new AST.Designators; desigInit.designatorList.push(AST.Designator(e)); } else if (token.value == TOK.dot) // . identifier { nextToken(); if (token.value != TOK.identifier) { error("identifier expected following `.` designator"); break; } if (!desigInit.designatorList) desigInit.designatorList = new AST.Designators; desigInit.designatorList.push(AST.Designator(token.ident)); nextToken(); } else { if (desigInit.designatorList) check(TOK.assign); break; } } desigInit.initializer = cparseInitializer(); if (!ci) ci = new AST.CInitializer(loc); ci.initializerList.push(desigInit); if (token.value == TOK.comma) { nextToken(); if (token.value != TOK.rightCurly) continue; } break; } check(TOK.rightCurly); //printf("ci: %s\n", ci.toChars()); return ci; } /************************************* * C11 6.7 * declaration-specifier: * storage-class-specifier declaration-specifiers (opt) * type-specifier declaration-specifiers (opt) * type-qualifier declaration-specifiers (opt) * function-specifier declaration-specifiers (opt) * alignment-specifier declaration-specifiers (opt) * Params: * level = declaration context * specifier = specifiers in and out * Returns: * resulting type, null if not specified */ private AST.Type cparseDeclarationSpecifiers(LVL level, ref Specifier specifier) { enum TKW : uint { xnone = 0, xchar = 1, xsigned = 2, xunsigned = 4, xshort = 8, xint = 0x10, xlong = 0x20, xllong = 0x40, xfloat = 0x80, xdouble = 0x100, xldouble = 0x200, xtag = 0x400, xident = 0x800, xvoid = 0x1000, xbool = 0x4000, ximaginary = 0x8000, xcomplex = 0x10000, x_Atomic = 0x20000, } AST.Type t; Loc loc; //printf("parseDeclarationSpecifiers()\n"); TKW tkw; SCW scw = specifier.scw & SCW.xtypedef; MOD mod; Identifier id; Identifier previd; Lwhile: while (1) { //printf("token %s\n", token.toChars()); TKW tkwx; SCW scwx; MOD modx; switch (token.value) { // Storage class specifiers case TOK.static_: scwx = SCW.xstatic; break; case TOK.extern_: scwx = SCW.xextern; break; case TOK.auto_: scwx = SCW.xauto; break; case TOK.register: scwx = SCW.xregister; break; case TOK.typedef_: scwx = SCW.xtypedef; break; case TOK.inline: scwx = SCW.xinline; break; case TOK._Noreturn: scwx = SCW.x_Noreturn; break; case TOK._Thread_local: scwx = SCW.x_Thread_local; break; // Type qualifiers case TOK.const_: modx = MOD.xconst; break; case TOK.volatile: modx = MOD.xvolatile; break; case TOK.restrict: modx = MOD.xrestrict; break; case TOK.__stdcall: modx = MOD.x__stdcall; break; // Type specifiers case TOK.char_: tkwx = TKW.xchar; break; case TOK.signed: tkwx = TKW.xsigned; break; case TOK.unsigned: tkwx = TKW.xunsigned; break; case TOK.int16: tkwx = TKW.xshort; break; case TOK.int32: tkwx = TKW.xint; break; case TOK.int64: tkwx = TKW.xlong; break; case TOK.float32: tkwx = TKW.xfloat; break; case TOK.float64: tkwx = TKW.xdouble; break; case TOK.void_: tkwx = TKW.xvoid; break; case TOK._Bool: tkwx = TKW.xbool; break; case TOK._Imaginary: tkwx = TKW.ximaginary; break; case TOK._Complex: tkwx = TKW.xcomplex; break; case TOK.identifier: tkwx = TKW.xident; id = token.ident; break; case TOK.struct_: case TOK.union_: { const structOrUnion = token.value; const sloc = token.loc; nextToken(); /* GNU Extensions * struct-or-union-specifier: * struct-or-union gnu-attributes (opt) identifier (opt) { struct-declaration-list } gnu-attributes (opt) * struct-or-union gnu-attribute (opt) identifier */ if (token.value == TOK.__attribute__) cparseGnuAttributes(specifier); t = cparseStruct(sloc, structOrUnion, symbols); tkwx = TKW.xtag; break; } case TOK.enum_: t = cparseEnum(symbols); tkwx = TKW.xtag; break; case TOK._Atomic: { // C11 6.7.2.4 // type-specifier if followed by `( type-name )` auto tk = peek(&token); if (tk.value == TOK.leftParenthesis) { tk = peek(tk); if (isTypeName(tk) && tk.value == TOK.rightParenthesis) { nextToken(); t = cparseTypeName(); // TODO - implement the "atomic" part of t tkwx = TKW.x_Atomic; break; } } // C11 6.7.3 type-qualifier if not modx = MOD.x_Atomic; break; } case TOK._Alignas: { /* C11 6.7.5 * _Alignas ( type-name ) * _Alignas ( constant-expression ) */ if (level & (LVL.parameter | LVL.prototype)) error("no alignment-specifier for parameters"); // C11 6.7.5-2 nextToken(); check(TOK.leftParenthesis); AST.Expression exp; auto tk = &token; if (isTypeName(tk)) // _Alignas ( type-name ) { auto talign = cparseTypeName(); /* Convert type to expression: `talign.alignof` */ auto e = new AST.TypeExp(loc, talign); exp = new AST.DotIdExp(loc, e, Id.__xalignof); } else // _Alignas ( constant-expression ) { exp = cparseConstantExp(); } if (!specifier.alignExps) specifier.alignExps = new AST.Expressions(0); specifier.alignExps.push(exp); check(TOK.rightParenthesis); break; } case TOK.__attribute__: { /* GNU Extensions * declaration-specifiers: * gnu-attributes declaration-specifiers (opt) */ cparseGnuAttributes(specifier); break; } case TOK.__declspec: { /* Microsoft extension */ cparseDeclspec(specifier); break; } case TOK.typeof_: { nextToken(); check(TOK.leftParenthesis); auto tk = &token; AST.Expression e; if (isTypeName(tk)) e = new AST.TypeExp(loc, cparseTypeName()); else e = cparseExpression(); t = new AST.TypeTypeof(loc, e); if(token.value == TOK.rightParenthesis) nextToken(); else { t = AST.Type.terror; error("`typeof` operator expects an expression or type name in parentheses"); // skipParens et. al expect to be on the opening parenthesis int parens; loop: while(1) { switch(token.value) { case TOK.leftParenthesis: parens++; break; case TOK.rightParenthesis: parens--; if(parens < 0) goto case; break; case TOK.endOfFile: break loop; default: } nextToken(); } } tkwx = TKW.xtag; break; } default: break Lwhile; } if (tkwx) { if (tkw & TKW.xlong && tkwx & TKW.xlong) { tkw &= ~TKW.xlong; tkwx = TKW.xllong; } if (tkw && tkwx & TKW.xident) { // 2nd identifier can't be a typedef break Lwhile; // leave parser on the identifier for the following declarator } else if (tkwx & TKW.xident) { // 1st identifier, save it for TypeIdentifier previd = id; } if (tkw & TKW.xident && tkwx || // typedef-name followed by type-specifier tkw & tkwx) // duplicate type-specifiers { error("illegal combination of type specifiers"); tkwx = TKW.init; } tkw |= tkwx; if (!(tkwx & TKW.xtag)) // if parser already advanced nextToken(); continue; } if (modx) { mod |= modx; nextToken(); continue; } if (scwx) { if (scw & scwx) error("duplicate storage class"); scw |= scwx; // C11 6.7.1-2 At most one storage-class may be given, except that // _Thread_local may appear with static or extern. const scw2 = scw & (SCW.xstatic | SCW.xextern | SCW.xauto | SCW.xregister | SCW.xtypedef); if (scw2 & (scw2 - 1) || scw & (SCW.x_Thread_local) && scw & (SCW.xauto | SCW.xregister | SCW.xtypedef)) { error("multiple storage classes in declaration specifiers"); scw &= ~scwx; } if (level == LVL.local && scw & (SCW.x_Thread_local) && scw & (SCW.xinline | SCW.x_Noreturn)) { error("`inline` and `_Noreturn` function specifiers not allowed for `_Thread_local`"); scw &= ~scwx; } if (level & (LVL.parameter | LVL.prototype) && scw & ~SCW.xregister) { error("only `register` storage class allowed for function parameters"); scw &= ~scwx; } if (level == LVL.global && scw & (SCW.xauto | SCW.xregister)) { error("`auto` and `register` storage class not allowed for global"); scw &= ~scwx; } nextToken(); continue; } } specifier.scw = scw; specifier.mod = mod; // Convert TKW bits to type t switch (tkw) { case TKW.xnone: t = null; break; case TKW.xchar: t = AST.Type.tchar; break; case TKW.xsigned | TKW.xchar: t = AST.Type.tint8; break; case TKW.xunsigned | TKW.xchar: t = AST.Type.tuns8; break; case TKW.xshort: case TKW.xsigned | TKW.xshort: case TKW.xsigned | TKW.xshort | TKW.xint: case TKW.xshort | TKW.xint: t = integerTypeForSize(shortsize); break; case TKW.xunsigned | TKW.xshort | TKW.xint: case TKW.xunsigned | TKW.xshort: t = unsignedTypeForSize(shortsize); break; case TKW.xint: case TKW.xsigned: case TKW.xsigned | TKW.xint: t = integerTypeForSize(intsize); break; case TKW.xunsigned: case TKW.xunsigned | TKW.xint: t = unsignedTypeForSize(intsize); break; case TKW.xlong: case TKW.xsigned | TKW.xlong: case TKW.xsigned | TKW.xlong | TKW.xint: case TKW.xlong | TKW.xint: t = integerTypeForSize(longsize); break; case TKW.xunsigned | TKW.xlong | TKW.xint: case TKW.xunsigned | TKW.xlong: t = unsignedTypeForSize(longsize); break; case TKW.xllong: case TKW.xsigned | TKW.xllong: case TKW.xsigned | TKW.xllong | TKW.xint: case TKW.xllong | TKW.xint: t = integerTypeForSize(long_longsize); break; case TKW.xunsigned | TKW.xllong | TKW.xint: case TKW.xunsigned | TKW.xllong: t = unsignedTypeForSize(long_longsize); break; case TKW.xvoid: t = AST.Type.tvoid; break; case TKW.xbool: t = boolsize == 1 ? AST.Type.tbool : integerTypeForSize(boolsize); break; case TKW.xfloat: t = AST.Type.tfloat32; break; case TKW.xdouble: t = AST.Type.tfloat64; break; case TKW.xlong | TKW.xdouble: t = realType(RTFlags.realfloat); break; case TKW.ximaginary | TKW.xfloat: t = AST.Type.timaginary32; break; case TKW.ximaginary | TKW.xdouble: t = AST.Type.timaginary64; break; case TKW.ximaginary | TKW.xlong | TKW.xdouble: t = realType(RTFlags.imaginary); break; case TKW.xcomplex | TKW.xfloat: t = AST.Type.tcomplex32; break; case TKW.xcomplex | TKW.xdouble: t = AST.Type.tcomplex64; break; case TKW.xcomplex | TKW.xlong | TKW.xdouble: t = realType(RTFlags.complex); break; case TKW.xident: { const idx = previd.toString(); if (idx.length > 2 && idx[0] == '_' && idx[1] == '_') // leading double underscore importBuiltins = true; // probably one of those compiler extensions t = null; /* Punch through to what the typedef is, to support things like: * typedef T* T; */ auto pt = lookupTypedef(previd); if (pt && *pt) // if previd is a known typedef t = *pt; if (!t) t = new AST.TypeIdentifier(loc, previd); break; } case TKW.xtag: break; // t is already set default: error("illegal type combination"); t = AST.Type.terror; break; } return t; } /******************************** * C11 6.7.6 * Parse a declarator (including function definitions). * declarator: * pointer (opt) direct-declarator * * direct-declarator : * identifier * ( declarator ) * direct-declarator [ type-qualifier-list (opt) assignment-expression (opt) ] * direct-declarator [ static type-qualifier-list (opt) assignment-expression ] * direct-declarator [ type-qualifier-list static assignment-expression (opt) ] * direct-declarator [ type-qualifier-list (opt) * ] * direct-declarator ( parameter-type-list ) * direct-declarator ( identifier-list (opt) ) * * pointer : * * type-qualifier-list (opt) * * type-qualifier-list (opt) pointer * * type-qualifier-list : * type-qualifier * type-qualifier-list type-qualifier * * parameter-type-list : * parameter-list * parameter-list , ... * * parameter-list : * parameter-declaration * parameter-list , parameter-declaration * * parameter-declaration : * declaration-specifiers declarator * declaration-specifiers abstract-declarator (opt) * * identifier-list : * identifier * identifier-list , identifier * * Params: * declarator = declarator kind * t = base type to start with * pident = set to Identifier if there is one, null if not * specifier = specifiers in and out * Returns: * type declared. If a TypeFunction is returned, this.symbols is the * symbol table for the parameter-type-list, which will contain any * declared struct, union or enum tags. */ private AST.Type cparseDeclarator(DTR declarator, AST.Type t, out Identifier pident, ref Specifier specifier) { //printf("cparseDeclarator(%d, %p)\n", declarator, t); AST.Types constTypes; // all the Types that will need `const` applied to them AST.Type parseDecl(AST.Type t) { AST.Type ts; while (1) { switch (token.value) { case TOK.identifier: // identifier //printf("identifier %s\n", token.ident.toChars()); if (declarator == DTR.xabstract) error("identifier not allowed in abstract-declarator"); pident = token.ident; ts = t; nextToken(); break; case TOK.leftParenthesis: // ( declarator ) /* like: T (*fp)(); * T ((*fp))(); */ nextToken(); if (token.value == TOK.__stdcall) // T (__stdcall*fp)(); { specifier.mod |= MOD.x__stdcall; nextToken(); } ts = parseDecl(t); check(TOK.rightParenthesis); break; case TOK.mul: // pointer t = new AST.TypePointer(t); nextToken(); // add post fixes const/volatile/restrict/_Atomic const mod = cparseTypeQualifierList(); if (mod & MOD.xconst) constTypes.push(t); if (token.value == TOK.__attribute__) cparseGnuAttributes(specifier); continue; default: if (declarator == DTR.xdirect) { if (!t || t.isTypeIdentifier()) { // const arr[1]; error("no type-specifier for declarator"); t = AST.Type.tint32; } else error("identifier or `(` expected"); // ) panic(); } ts = t; break; } break; } // parse DeclaratorSuffixes while (1) { /* Insert tx -> t into * ts -> ... -> t * so that * ts -> ... -> tx -> t */ static void insertTx(ref AST.Type ts, AST.Type tx, AST.Type t) { AST.Type* pt; for (pt = &ts; *pt != t; pt = &(cast(AST.TypeNext)*pt).next) { } *pt = tx; } switch (token.value) { case TOK.leftBracket: { // post [] syntax, pick up any leading type qualifiers, `static` and `*` AST.Type ta; nextToken(); auto mod = cparseTypeQualifierList(); // const/volatile/restrict/_Atomic bool isStatic; bool isVLA; if (token.value == TOK.static_) { isStatic = true; // `static` nextToken(); if (!mod) // type qualifiers after `static` mod = cparseTypeQualifierList(); } else if (token.value == TOK.mul) { if (peekNext() == TOK.rightBracket) { isVLA = true; // `*` nextToken(); } } if (isStatic || token.value != TOK.rightBracket) { //printf("It's a static array\n"); AST.Expression e = cparseAssignExp(); // [ expression ] ta = new AST.TypeSArray(t, e); } else { /* C11 6.7.6.2-4 An [ ] array is an incomplete array type */ ta = new AST.TypeSArray(t); } check(TOK.rightBracket); // Issue errors for unsupported types. if (isVLA) // C11 6.7.6.2 { error("variable length arrays are not supported"); } if (isStatic) // C11 6.7.6.3 { error("static array parameters are not supported"); } if (declarator != DTR.xparameter) { /* C11 6.7.6.2-4: '*' can only be used with function prototype scope. */ if (isVLA) error("variable length array used outside of function prototype"); /* C11 6.7.6.2-1: type qualifiers and 'static' shall only appear * in a declaration of a function parameter with an array type. */ if (isStatic || mod) error("static or type qualifier used outside of function prototype"); } if (ts.isTypeSArray() || ts.isTypeDArray()) { /* C11 6.7.6.2-1: type qualifiers and 'static' shall only appear * in the outermost array type derivation. */ if (isStatic || mod) error("static or type qualifier used in non-outermost array type derivation"); /* C11 6.7.6.2-1: the element type shall not be an incomplete or * function type. */ if (ta.isTypeSArray() && ta.isTypeSArray().isIncomplete() && !isVLA) error("array type has incomplete element type `%s`", ta.toChars()); } // Apply type qualifiers to the constructed type. if (mod & MOD.xconst) // ignore the other bits ta = toConst(ta); insertTx(ts, ta, t); // ts -> ... -> ta -> t continue; } case TOK.leftParenthesis: { // New symbol table for parameter-list auto symbolsSave = this.symbols; this.symbols = null; auto parameterList = cparseParameterList(); const lkg = specifier.mod & MOD.x__stdcall ? LINK.windows : linkage; AST.Type tf = new AST.TypeFunction(parameterList, t, lkg, 0); // tf = tf.addSTC(storageClass); // TODO insertTx(ts, tf, t); // ts -> ... -> tf -> t if (ts != tf) this.symbols = symbolsSave; break; } default: break; } break; } return ts; } t = parseDecl(t); /* Because const is transitive, cannot assemble types from * fragments. Instead, types to be annotated with const are put * in constTypes[], and a bottom up scan of t is done to apply * const */ if (constTypes.length) { AST.Type constApply(AST.Type t) { if (t.nextOf()) { auto tn = cast(AST.TypeNext)t; // t.nextOf() should return a ref instead of this tn.next = constApply(tn.next); } foreach (tc; constTypes[]) { if (tc is t) { return toConst(t); } } return t; } if (declarator == DTR.xparameter && t.isTypePointer()) { /* Because there are instances in .h files of "const pointer to mutable", * skip applying transitive `const` * https://issues.dlang.org/show_bug.cgi?id=22534 */ auto tn = cast(AST.TypeNext)t; tn.next = constApply(tn.next); } else t = constApply(t); } //printf("result: %s\n", t.toChars()); return t; } /****************************** * C11 6.7.3 * type-qualifier: * const * restrict * volatile * _Atomic * __stdcall */ MOD cparseTypeQualifierList() { MOD mod; while (1) { switch (token.value) { case TOK.const_: mod |= MOD.xconst; break; case TOK.volatile: mod |= MOD.xvolatile; break; case TOK.restrict: mod |= MOD.xrestrict; break; case TOK._Atomic: mod |= MOD.x_Atomic; break; case TOK.__stdcall: mod |= MOD.x__stdcall; break; default: return mod; } nextToken(); } } /*********************************** * C11 6.7.7 */ AST.Type cparseTypeName() { Specifier specifier; specifier.packalign.setDefault(); auto tspec = cparseSpecifierQualifierList(LVL.global, specifier); if (!tspec) { error("type-specifier is missing"); tspec = AST.Type.tint32; } if (tspec && specifier.mod & MOD.xconst) { tspec = toConst(tspec); specifier.mod = MOD.xnone; // 'used' it } Identifier id; return cparseDeclarator(DTR.xabstract, tspec, id, specifier); } /*********************************** * C11 6.7.2.1 * specifier-qualifier-list: * type-specifier specifier-qualifier-list (opt) * type-qualifier specifier-qualifier-list (opt) * Params: * level = declaration context * specifier = specifiers in and out * Returns: * resulting type, null if not specified */ AST.Type cparseSpecifierQualifierList(LVL level, ref Specifier specifier) { auto t = cparseDeclarationSpecifiers(level, specifier); if (specifier.scw) error("storage class not allowed in specifier-qualified-list"); return t; } /*********************************** * C11 6.7.6.3 * ( parameter-type-list ) * ( identifier-list (opt) ) */ AST.ParameterList cparseParameterList() { auto parameters = new AST.Parameters(); AST.VarArg varargs = AST.VarArg.none; StorageClass varargsStc; check(TOK.leftParenthesis); if (token.value == TOK.void_ && peekNext() == TOK.rightParenthesis) // func(void) { nextToken(); nextToken(); return AST.ParameterList(parameters, varargs, varargsStc); } if (token.value == TOK.rightParenthesis) // func() { nextToken(); importBuiltins = true; // will need __va_list_tag return AST.ParameterList(parameters, AST.VarArg.variadic, varargsStc); } /* Create function prototype scope */ typedefTab.push(null); AST.ParameterList finish() { typedefTab.pop(); return AST.ParameterList(parameters, varargs, varargsStc); } /* The check for identifier-list comes later, * when doing the trailing declaration-list (opt) */ while (1) { if (token.value == TOK.rightParenthesis) break; if (token.value == TOK.dotDotDot) { if (parameters.length == 0) // func(...) error("named parameter required before `...`"); importBuiltins = true; // will need __va_list_tag varargs = AST.VarArg.variadic; // C-style variadics nextToken(); check(TOK.rightParenthesis); return finish(); } Specifier specifier; specifier.packalign.setDefault(); auto tspec = cparseDeclarationSpecifiers(LVL.prototype, specifier); if (!tspec) { error("no type-specifier for parameter"); tspec = AST.Type.tint32; } if (specifier.mod & MOD.xconst) { if ((token.value == TOK.rightParenthesis || token.value == TOK.comma) && tspec.isTypeIdentifier()) error("type-specifier omitted for parameter `%s`", tspec.isTypeIdentifier().ident.toChars()); tspec = toConst(tspec); specifier.mod = MOD.xnone; // 'used' it } Identifier id; auto t = cparseDeclarator(DTR.xparameter, tspec, id, specifier); if (token.value == TOK.__attribute__) cparseGnuAttributes(specifier); if (specifier.mod & MOD.xconst) t = toConst(t); auto param = new AST.Parameter(specifiersToSTC(LVL.parameter, specifier), t, id, null, null); parameters.push(param); if (token.value == TOK.rightParenthesis) break; check(TOK.comma); } nextToken(); return finish(); } /*********************************** * C11 6.7.10 * _Static_assert ( constant-expression , string-literal ) ; */ private AST.StaticAssert cparseStaticAssert() { const loc = token.loc; //printf("cparseStaticAssert()\n"); nextToken(); check(TOK.leftParenthesis); auto exp = cparseConstantExp(); check(TOK.comma); if (token.value != TOK.string_) error("string literal expected"); auto msg = cparsePrimaryExp(); check(TOK.rightParenthesis); check(TOK.semicolon); return new AST.StaticAssert(loc, exp, msg); } /************************* * Collect argument list. * Parser is on opening parenthesis. * Returns: * the arguments */ private AST.Expressions* cparseArguments() { nextToken(); auto arguments = new AST.Expressions(); while (token.value != TOK.rightParenthesis && token.value != TOK.endOfFile) { auto arg = cparseAssignExp(); arguments.push(arg); if (token.value != TOK.comma) break; nextToken(); // consume comma } check(TOK.rightParenthesis); return arguments; } /************************* * __declspec parser * https://docs.microsoft.com/en-us/cpp/cpp/declspec * decl-specifier: * __declspec ( extended-decl-modifier-seq ) * * extended-decl-modifier-seq: * extended-decl-modifier (opt) * extended-decl-modifier extended-decl-modifier-seq * * extended-decl-modifier: * dllimport * dllexport * noreturn * Params: * specifier = filled in with the attribute(s) */ private void cparseDeclspec(ref Specifier specifier) { //printf("cparseDeclspec()\n"); /* Check for dllexport, dllimport * Ignore the rest */ bool dllimport; // TODO implement bool dllexport; // TODO implement nextToken(); // move past __declspec check(TOK.leftParenthesis); while (1) { if (token.value == TOK.rightParenthesis) { nextToken(); break; } else if (token.value == TOK.endOfFile) break; else if (token.value == TOK.identifier) { if (token.ident == Id.dllimport) { dllimport = true; nextToken(); } else if (token.ident == Id.dllexport) { dllexport = true; nextToken(); } else if (token.ident == Id.noreturn) { specifier.noreturn = true; nextToken(); } else { nextToken(); if (token.value == TOK.leftParenthesis) cparseParens(); } } else { error("extended-decl-modifier expected"); break; } } } /************************* * Simple asm parser * https://gcc.gnu.org/onlinedocs/gcc/Asm-Labels.html * simple-asm-expr: * asm ( asm-string-literal ) * * asm-string-literal: * string-literal */ private AST.StringExp cparseSimpleAsmExpr() { nextToken(); // move past asm check(TOK.leftParenthesis); if (token.value != TOK.string_) error("string literal expected"); auto label = cparsePrimaryExp(); check(TOK.rightParenthesis); return cast(AST.StringExp) label; } /************************* * __attribute__ parser * https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html * gnu-attributes: * gnu-attributes gnu-attribute-specifier * * gnu-attribute-specifier: * __attribute__ (( gnu-attribute-list )) * * gnu-attribute-list: * gnu-attribute (opt) * gnu-attribute-list , gnu-attribute * * Params: * specifier = filled in with the attribute(s) */ private void cparseGnuAttributes(ref Specifier specifier) { while (token.value == TOK.__attribute__) { nextToken(); // move past __attribute__ check(TOK.leftParenthesis); check(TOK.leftParenthesis); if (token.value != TOK.rightParenthesis) { while (1) { cparseGnuAttribute(specifier); if (token.value != TOK.comma) break; nextToken(); } } check(TOK.rightParenthesis); check(TOK.rightParenthesis); } } /************************* * Parse a single GNU attribute * gnu-attribute: * gnu-attribute-name * gnu-attribute-name ( identifier ) * gnu-attribute-name ( identifier , expression-list ) * gnu-attribute-name ( expression-list (opt) ) * * gnu-attribute-name: * keyword * identifier * * expression-list: * constant-expression * expression-list , constant-expression * * Params: * specifier = filled in with the attribute(s) */ private void cparseGnuAttribute(ref Specifier specifier) { /* Check for dllimport, dllexport, vector_size(bytes) * Ignore the rest */ bool dllimport; // TODO implement bool dllexport; // TODO implement if (!isGnuAttributeName()) return; if (token.value == TOK.identifier) { if (token.ident == Id.dllimport) { dllimport = true; nextToken(); } else if (token.ident == Id.dllexport) { dllexport = true; nextToken(); } else if (token.ident == Id.noreturn) { specifier.noreturn = true; nextToken(); } else if (token.ident == Id.vector_size) { nextToken(); check(TOK.leftParenthesis); cparseConstantExp(); // TODO implement check(TOK.rightParenthesis); } else { nextToken(); if (token.value == TOK.leftParenthesis) cparseParens(); } } else { nextToken(); if (token.value == TOK.leftParenthesis) cparseParens(); } } /************************* * See if match for GNU attribute name, which may be any identifier, * storage-class-specifier, type-specifier, or type-qualifier. * Returns: * true if a valid GNU attribute name */ private bool isGnuAttributeName() { switch (token.value) { case TOK.identifier: case TOK.static_: case TOK.unsigned: case TOK.int64: case TOK.const_: case TOK.extern_: case TOK.register: case TOK.typedef_: case TOK.int16: case TOK.inline: case TOK._Noreturn: case TOK.volatile: case TOK.signed: case TOK.auto_: case TOK.restrict: case TOK._Complex: case TOK._Thread_local: case TOK.int32: case TOK.char_: case TOK.float32: case TOK.float64: case TOK.void_: case TOK._Bool: case TOK._Atomic: return true; default: return false; } } /*************************** * Like skipParens(), but consume the tokens. */ private void cparseParens() { check(TOK.leftParenthesis); int parens = 1; while (1) { switch (token.value) { case TOK.leftParenthesis: ++parens; break; case TOK.rightParenthesis: --parens; if (parens < 0) { error("extra right parenthesis"); return; } if (parens == 0) { nextToken(); return; } break; case TOK.endOfFile: error("end of file found before right parenthesis"); return; default: break; } nextToken(); } } //} /******************************************************************************/ /***************************** Struct & Enum Parser ***************************/ //{ /************************************* * C11 6.7.2.2 * enum-specifier: * enum identifier (opt) { enumerator-list } * enum identifier (opt) { enumerator-list , } * enum identifier * * enumerator-list: * enumerator * enumerator-list , enumerator * * enumerator: * enumeration-constant * enumeration-constant = constant-expression * * enumeration-constant: * identifier * * Params: * symbols = symbols to add enum declaration to * Returns: * type of the enum */ private AST.Type cparseEnum(ref AST.Dsymbols* symbols) { const loc = token.loc; nextToken(); /* GNU Extensions * enum-specifier: * enum gnu-attributes (opt) identifier (opt) { enumerator-list } gnu-attributes (opt) * enum gnu-attributes (opt) identifier (opt) { enumerator-list , } gnu-attributes (opt) * enum gnu-attributes (opt) identifier */ Specifier specifier; specifier.packalign.setDefault(); if (token.value == TOK.__attribute__) cparseGnuAttributes(specifier); Identifier tag; if (token.value == TOK.identifier) { tag = token.ident; nextToken(); } /* clang extension: add optional base type after the identifier * https://en.cppreference.com/w/cpp/language/enum * enum Identifier : Type */ AST.Type base = AST.Type.tint32; // C11 6.7.2.2-4 implementation defined default base type if (token.value == TOK.colon) { nextToken(); base = cparseTypeName(); } AST.Dsymbols* members; if (token.value == TOK.leftCurly) { nextToken(); members = new AST.Dsymbols(); if (token.value == TOK.rightCurly) // C11 6.7.2.2-1 { if (tag) error("no members for `enum %s`", tag.toChars()); else error("no members for anonymous enum"); } while (token.value == TOK.identifier) { auto ident = token.ident; // enumeration-constant nextToken(); auto mloc = token.loc; if (token.value == TOK.__attribute__) { /* gnu-attributes can appear here, but just scan and ignore them * https://gcc.gnu.org/onlinedocs/gcc/Enumerator-Attributes.html */ Specifier specifierx; specifierx.packalign.setDefault(); cparseGnuAttributes(specifierx); } AST.Expression value; if (token.value == TOK.assign) { nextToken(); value = cparseConstantExp(); // TODO C11 6.7.2.2-2 value must fit into an int } if (token.value == TOK.__attribute__) { /* gnu-attributes can appear here, but just scan and ignore them * https://gcc.gnu.org/onlinedocs/gcc/Enumerator-Attributes.html */ Specifier specifierx; specifierx.packalign.setDefault(); cparseGnuAttributes(specifierx); } auto em = new AST.EnumMember(mloc, ident, value, null, 0, null, null); members.push(em); if (token.value == TOK.comma) { nextToken(); continue; } break; } check(TOK.rightCurly); /* GNU Extensions * Parse the postfix gnu-attributes (opt) */ if (token.value == TOK.__attribute__) cparseGnuAttributes(specifier); } else if (!tag) error("missing `identifier` after `enum`"); /* Need semantic information to determine if this is a declaration, * redeclaration, or reference to existing declaration. * Defer to the semantic() pass with a TypeTag. */ return new AST.TypeTag(loc, TOK.enum_, tag, base, members); } /************************************* * C11 6.7.2.1 * Parse struct and union specifiers. * Parser is advanced to the tag identifier or brace. * struct-or-union-specifier: * struct-or-union identifier (opt) { struct-declaration-list } * struct-or-union identifier * * struct-or-union: * struct * union * * struct-declaration-list: * struct-declaration * struct-declaration-list struct-declaration * * Params: * loc = location of `struct` or `union` * structOrUnion = TOK.struct_ or TOK.union_ * symbols = symbols to add struct-or-union declaration to * Returns: * type of the struct */ private AST.Type cparseStruct(Loc loc, TOK structOrUnion, ref AST.Dsymbols* symbols) { Identifier tag; if (token.value == TOK.identifier) { tag = token.ident; nextToken(); } AST.Dsymbols* members; if (token.value == TOK.leftCurly) { nextToken(); members = new AST.Dsymbols(); // so `members` will be non-null even with 0 members while (token.value != TOK.rightCurly) { cparseStructDeclaration(members); if (token.value == TOK.endOfFile) break; } check(TOK.rightCurly); if ((*members).length == 0) // C11 6.7.2.1-8 { /* allow empty structs as an extension * struct-declarator-list: * struct-declarator (opt) */ } } else if (!tag) error("missing tag `identifier` after `%s`", Token.toChars(structOrUnion)); /* Need semantic information to determine if this is a declaration, * redeclaration, or reference to existing declaration. * Defer to the semantic() pass with a TypeTag. */ return new AST.TypeTag(loc, structOrUnion, tag, null, members); } /************************************* * C11 6.7.2.1 * Parse a struct declaration member. * struct-declaration: * specifier-qualifier-list struct-declarator-list (opt) ; * static_assert-declaration * * struct-declarator-list: * struct-declarator * struct-declarator-list , struct-declarator * * struct-declarator: * declarator * declarator (opt) : constant-expression * Params: * members = where to put the fields (members) */ void cparseStructDeclaration(AST.Dsymbols* members) { //printf("cparseStructDeclaration()\n"); if (token.value == TOK._Static_assert) { auto s = cparseStaticAssert(); members.push(s); return; } Specifier specifier; specifier.packalign = this.packalign; auto tspec = cparseSpecifierQualifierList(LVL.member, specifier); if (!tspec) { error("no type-specifier for struct member"); tspec = AST.Type.tint32; } if (specifier.mod & MOD.xconst) { tspec = toConst(tspec); specifier.mod = MOD.xnone; // 'used' it } /* If a declarator does not follow, it is unnamed */ if (token.value == TOK.semicolon && tspec) { nextToken(); auto tt = tspec.isTypeTag(); if (!tt) { if (auto ti = tspec.isTypeIdentifier()) { error("type-specifier omitted before declaration of `%s`", ti.ident.toChars()); } return; // legal but meaningless empty declaration } /* If anonymous struct declaration * struct { ... members ... }; * C11 6.7.2.1-13 */ if (!tt.id && tt.members) { /* members of anonymous struct are considered members of * the containing struct */ auto ad = new AST.AnonDeclaration(tt.loc, tt.tok == TOK.union_, tt.members); auto s = applySpecifier(ad, specifier); members.push(s); return; } if (!tt.id && !tt.members) return; // already gave error in cparseStruct() /* `struct tag;` and `struct tag { ... };` * always result in a declaration in the current scope */ // TODO: merge in specifier auto stag = (tt.tok == TOK.struct_) ? new AST.StructDeclaration(tt.loc, tt.id, false) : new AST.UnionDeclaration(tt.loc, tt.id); stag.members = tt.members; if (!symbols) symbols = new AST.Dsymbols(); auto s = applySpecifier(stag, specifier); symbols.push(s); return; } while (1) { Identifier id; AST.Type dt; if (token.value == TOK.colon) { if (auto ti = tspec.isTypeIdentifier()) { error("type-specifier omitted before bit field declaration of `%s`", ti.ident.toChars()); tspec = AST.Type.tint32; } // C11 6.7.2.1-12 unnamed bit-field id = Identifier.generateAnonymousId("BitField"); dt = tspec; } else { dt = cparseDeclarator(DTR.xdirect, tspec, id, specifier); if (!dt) { panic(); nextToken(); break; // error recovery } } AST.Expression width; if (token.value == TOK.colon) { // C11 6.7.2.1-10 bit-field nextToken(); width = cparseConstantExp(); } /* GNU Extensions * struct-declarator: * declarator gnu-attributes (opt) * declarator (opt) : constant-expression gnu-attributes (opt) */ if (token.value == TOK.__attribute__) cparseGnuAttributes(specifier); if (!tspec && !specifier.scw && !specifier.mod) error("specifier-qualifier-list required"); else if (width) { if (specifier.alignExps) error("no alignment-specifier for bit field declaration"); // C11 6.7.5-2 auto s = new AST.BitFieldDeclaration(width.loc, dt, id, width); members.push(s); } else if (id) { if (dt.ty == AST.Tvoid) error("`void` has no value"); // declare the symbol // Give member variables an implicit void initializer auto initializer = new AST.VoidInitializer(token.loc); AST.Dsymbol s = new AST.VarDeclaration(token.loc, dt, id, initializer, specifiersToSTC(LVL.member, specifier)); s = applySpecifier(s, specifier); members.push(s); } switch (token.value) { case TOK.identifier: error("missing comma"); goto default; case TOK.semicolon: nextToken(); return; case TOK.comma: nextToken(); break; default: error("`;` or `,` expected"); while (token.value != TOK.semicolon && token.value != TOK.endOfFile) nextToken(); nextToken(); return; } } } //} /******************************************************************************/ /********************************* Lookahead Parser ***************************/ //{ /************************************ * Determine if the scanner is sitting on the start of a declaration. * Params: * t = current token of the scanner * needId = flag with additional requirements for a declaration * endtok = ending token * pt = will be set ending token (if not null) * Returns: * true at start of a declaration */ private bool isCDeclaration(ref Token* pt) { auto t = pt; //printf("isCDeclaration() %s\n", t.toChars()); if (!isDeclarationSpecifiers(t)) return false; while (1) { if (t.value == TOK.semicolon) { t = peek(t); pt = t; return true; } if (!isCDeclarator(t, DTR.xdirect)) return false; if (t.value == TOK.asm_) { t = peek(t); if (t.value != TOK.leftParenthesis || !skipParens(t, &t)) return false; } if (t.value == TOK.__attribute__) { t = peek(t); if (t.value != TOK.leftParenthesis || !skipParens(t, &t)) return false; } if (t.value == TOK.assign) { t = peek(t); if (!isInitializer(t)) return false; } switch (t.value) { case TOK.comma: t = peek(t); break; case TOK.semicolon: t = peek(t); pt = t; return true; default: return false; } } } /******************************** * See if match for initializer. * Params: * pt = starting token, updated to one past end of initializer if true * Returns: * true if initializer */ private bool isInitializer(ref Token* pt) { //printf("isInitializer()\n"); auto t = pt; if (t.value == TOK.leftCurly) { if (!skipBraces(t)) return false; pt = t; return true; } // skip over assignment-expression, ending before comma or semiColon or EOF if (!isAssignmentExpression(t)) return false; pt = t; return true; } /******************************** * See if match for: * postfix-expression ( argument-expression-list(opt) ) * Params: * pt = starting token, updated to one past end of initializer if true * Returns: * true if function call */ private bool isFunctionCall(ref Token* pt) { //printf("isFunctionCall()\n"); auto t = pt; if (!isPrimaryExpression(t)) return false; if (t.value != TOK.leftParenthesis) return false; t = peek(t); while (1) { if (!isAssignmentExpression(t)) return false; if (t.value == TOK.comma) { t = peek(t); continue; } if (t.value == TOK.rightParenthesis) { t = peek(t); break; } return false; } if (t.value != TOK.semicolon) return false; pt = t; return true; } /******************************** * See if match for assignment-expression. * Params: * pt = starting token, updated to one past end of assignment-expression if true * Returns: * true if assignment-expression */ private bool isAssignmentExpression(ref Token* pt) { auto t = pt; //printf("isAssignmentExpression() %s\n", t.toChars()); /* This doesn't actually check for grammar matching an * assignment-expression. It just matches ( ) [ ] looking for * an ending token that would terminate one. */ bool any; while (1) { switch (t.value) { case TOK.comma: case TOK.semicolon: case TOK.rightParenthesis: case TOK.rightBracket: case TOK.endOfFile: if (!any) return false; break; case TOK.leftParenthesis: if (!skipParens(t, &t)) return false; /* https://issues.dlang.org/show_bug.cgi?id=22267 Fix issue 22267: If the parser encounters the following `identifier variableName = (expression);` the initializer is not identified as such since the parentheses cause the parser to keep walking indefinitely (whereas `(1) + 1` would not be affected.). */ any = true; continue; case TOK.leftBracket: if (!skipBrackets(t)) return false; continue; case TOK.leftCurly: if (!skipBraces(t)) return false; continue; default: any = true; // assume token was part of an a-e t = peek(t); continue; } pt = t; return true; } } /******************************** * See if match for constant-expression. * Params: * pt = starting token, updated to one past end of constant-expression if true * Returns: * true if constant-expression */ private bool isConstantExpression(ref Token* pt) { return isAssignmentExpression(pt); } /******************************** * See if match for declaration-specifiers. * No errors are diagnosed. * Params: * pt = starting token, updated to one past end of declaration-specifiers if true * Returns: * true if declaration-specifiers */ private bool isDeclarationSpecifiers(ref Token* pt) { //printf("isDeclarationSpecifiers()\n"); auto t = pt; bool seenType; bool any; while (1) { switch (t.value) { // type-specifiers case TOK.void_: case TOK.char_: case TOK.int16: case TOK.int32: case TOK.int64: case TOK.float32: case TOK.float64: case TOK.signed: case TOK.unsigned: case TOK._Bool: //case TOK._Imaginary: case TOK._Complex: t = peek(t); seenType = true; any = true; continue; case TOK.identifier: // typedef-name if (!seenType) { t = peek(t); seenType = true; any = true; continue; } break; case TOK.struct_: case TOK.union_: case TOK.enum_: t = peek(t); if (t.value == TOK.identifier) { t = peek(t); if (t.value == TOK.leftCurly) { if (!skipBraces(t)) return false; } } else if (t.value == TOK.leftCurly) { if (!skipBraces(t)) return false; } else return false; any = true; continue; // storage-class-specifiers case TOK.typedef_: case TOK.extern_: case TOK.static_: case TOK._Thread_local: case TOK.auto_: case TOK.register: // function-specifiers case TOK.inline: case TOK._Noreturn: // type-qualifiers case TOK.const_: case TOK.volatile: case TOK.restrict: case TOK.__stdcall: t = peek(t); any = true; continue; case TOK._Alignas: // alignment-specifier case TOK.__declspec: // decl-specifier case TOK.__attribute__: // attribute-specifier t = peek(t); if (!skipParens(t, &t)) return false; any = true; continue; // either atomic-type-specifier or type_qualifier case TOK._Atomic: // TODO _Atomic ( type-name ) t = peek(t); if (t.value == TOK.leftParenthesis) // maybe atomic-type-specifier { auto tsave = t; t = peek(t); if (!isTypeName(t) || t.value != TOK.rightParenthesis) { // it's a type-qualifier t = tsave; // back up parser any = true; continue; } t = peek(t); // move past right parenthesis of atomic-type-specifier } any = true; continue; default: break; } break; } if (any) { pt = t; return true; } return false; } /************************************** * See if declaration-list is present. * Returns: * true if declaration-list is present, even an empty one */ bool isDeclarationList(ref Token* pt) { auto t = pt; while (1) { if (t.value == TOK.leftCurly) { pt = t; return true; } if (!isCDeclaration(t)) return false; } } /******************************************* * Skip braces. * Params: * pt = enters on left brace, set to token past right bracket on true * Returns: * true if successful */ private bool skipBraces(ref Token* pt) { auto t = pt; if (t.value != TOK.leftCurly) return false; int braces = 0; while (1) { switch (t.value) { case TOK.leftCurly: ++braces; t = peek(t); continue; case TOK.rightCurly: --braces; if (braces == 0) { pt = peek(t); return true; } if (braces < 0) return false; t = peek(t); continue; case TOK.endOfFile: return false; default: t = peek(t); continue; } } } /******************************************* * Skip brackets. * Params: * pt = enters on left bracket, set to token past right bracket on true * Returns: * true if successful */ private bool skipBrackets(ref Token* pt) { auto t = pt; if (t.value != TOK.leftBracket) return false; int brackets = 0; while (1) { switch (t.value) { case TOK.leftBracket: ++brackets; t = peek(t); continue; case TOK.rightBracket: --brackets; if (brackets == 0) { pt = peek(t); return true; } if (brackets < 0) return false; t = peek(t); continue; case TOK.endOfFile: return false; default: t = peek(t); continue; } } } /********************************* * Check to see if tokens starting with *pt form a declarator. * Params: * pt = pointer to starting token, updated to point past declarator if true is returned * declarator = declarator kind * Returns: * true if it does */ private bool isCDeclarator(ref Token* pt, DTR declarator) { auto t = pt; while (1) { if (t.value == TOK.mul) // pointer { t = peek(t); if (!isTypeQualifierList(t)) return false; } else break; } if (t.value == TOK.identifier) { if (declarator == DTR.xabstract) return false; t = peek(t); } else if (t.value == TOK.leftParenthesis) { t = peek(t); if (!isCDeclarator(t, declarator)) return false; if (t.value != TOK.rightParenthesis) return false; t = peek(t); } else if (declarator == DTR.xdirect) { return false; } while (1) { if (t.value == TOK.leftBracket) { if (!skipBrackets(t)) return false; } else if (t.value == TOK.leftParenthesis) { if (!skipParens(t, &t)) return false; } else break; } pt = t; return true; } /*************************** * Is this the start of a type-qualifier-list? * (Can be empty.) * Params: * pt = first token; updated with past end of type-qualifier-list if true * Returns: * true if start of type-qualifier-list */ private bool isTypeQualifierList(ref Token* pt) { auto t = pt; while (1) { switch (t.value) { case TOK.const_: case TOK.restrict: case TOK.volatile: case TOK._Atomic: case TOK.__stdcall: t = peek(t); continue; default: break; } break; } pt = t; return true; } /*************************** * Is this the start of a type-name? * Params: * pt = first token; updated with past end of type-name if true * Returns: * true if start of type-name */ private bool isTypeName(ref Token* pt) { auto t = pt; //printf("isTypeName() %s\n", t.toChars()); if (!isSpecifierQualifierList(t)) return false; if (!isCDeclarator(t, DTR.xabstract)) return false; if (t.value != TOK.rightParenthesis) return false; pt = t; return true; } /*************************** * Is this the start of a specifier-qualifier-list? * Params: * pt = first token; updated with past end of specifier-qualifier-list if true * Returns: * true if start of specifier-qualifier-list */ private bool isSpecifierQualifierList(ref Token* pt) { auto t = pt; bool result; while (1) { switch (t.value) { // Type Qualifiers case TOK.const_: case TOK.restrict: case TOK.volatile: case TOK.__stdcall: // Type Specifiers case TOK.char_: case TOK.signed: case TOK.unsigned: case TOK.int16: case TOK.int32: case TOK.int64: case TOK.float32: case TOK.float64: case TOK.void_: case TOK._Bool: //case TOK._Imaginary: // ? missing in Spec case TOK._Complex: t = peek(t); break; case TOK.identifier: // Use typedef table to disambiguate if (isTypedef(t.ident)) { t = peek(t); break; } else { return false; } // struct-or-union-specifier // enum-specifier case TOK.struct_: case TOK.union_: case TOK.enum_: t = peek(t); if (t.value == TOK.identifier) { t = peek(t); if (t.value == TOK.leftCurly) { if (!skipBraces(t)) return false; } } else if (t.value == TOK.leftCurly) { if (!skipBraces(t)) return false; } else return false; break; // atomic-type-specifier case TOK._Atomic: case TOK.typeof_: t = peek(t); if (t.value != TOK.leftParenthesis || !skipParens(t, &t)) return false; break; default: if (result) pt = t; return result; } result = true; } } /************************************ * Looking at the leading left parenthesis, and determine if it is * either of the following: * ( type-name ) cast-expression * ( type-name ) { initializer-list } * as opposed to: * ( expression ) * Params: * pt = starting token, updated to one past end of constant-expression if true * afterParenType = true if already seen `( type-name )` * Returns: * true if matches ( type-name ) ... */ private bool isCastExpression(ref Token* pt, bool afterParenType = false) { enum log = false; if (log) printf("isCastExpression(tk: `%s`, afterParenType: %d)\n", token.toChars(pt.value), afterParenType); auto t = pt; switch (t.value) { case TOK.leftParenthesis: auto tk = peek(t); // move past left parenthesis if (!isTypeName(tk) || tk.value != TOK.rightParenthesis) { if (afterParenType) goto default; // could be ( type-name ) ( unary-expression ) return false; } tk = peek(tk); // move past right parenthesis if (tk.value == TOK.leftCurly) { // ( type-name ) { initializer-list } if (!isInitializer(tk)) { return false; } t = tk; break; } if (tk.value == TOK.leftParenthesis && peek(tk).value == TOK.rightParenthesis) { return false; // (type-name)() is not a cast (it might be a function call) } if (!isCastExpression(tk, true)) { if (afterParenType) // could be ( type-name ) ( unary-expression ) goto default; // where unary-expression also matched type-name return true; } // ( type-name ) cast-expression t = tk; break; default: if (!afterParenType || !isUnaryExpression(t, afterParenType)) { return false; } // if we've already seen ( type-name ), then this is a cast break; } pt = t; if (log) printf("isCastExpression true\n"); return true; } /******************************** * See if match for unary-expression. * Params: * pt = starting token, updated to one past end of constant-expression if true * afterParenType = true if already seen ( type-name ) of a cast-expression * Returns: * true if unary-expression */ private bool isUnaryExpression(ref Token* pt, bool afterParenType = false) { auto t = pt; switch (t.value) { case TOK.plusPlus: case TOK.minusMinus: t = peek(t); if (!isUnaryExpression(t, afterParenType)) return false; break; case TOK.and: case TOK.mul: case TOK.min: case TOK.add: case TOK.not: case TOK.tilde: t = peek(t); if (!isCastExpression(t, afterParenType)) return false; break; case TOK.sizeof_: t = peek(t); if (t.value == TOK.leftParenthesis) { auto tk = peek(t); if (isTypeName(tk)) { if (tk.value != TOK.rightParenthesis) return false; t = peek(tk); break; } } if (!isUnaryExpression(t, afterParenType)) return false; break; case TOK._Alignof: t = peek(t); if (t.value != TOK.leftParenthesis) return false; t = peek(t); if (!isTypeName(t) || t.value != TOK.rightParenthesis) return false; break; default: // Compound literals are handled by cast and sizeof expressions, // so be content with just seeing a primary expression. if (!isPrimaryExpression(t)) return false; break; } pt = t; return true; } /******************************** * See if match for primary-expression. * Params: * pt = starting token, updated to one past end of constant-expression if true * Returns: * true if primary-expression */ private bool isPrimaryExpression(ref Token* pt) { auto t = pt; switch (t.value) { case TOK.identifier: case TOK.charLiteral: case TOK.int32Literal: case TOK.uns32Literal: case TOK.int64Literal: case TOK.uns64Literal: case TOK.float32Literal: case TOK.float64Literal: case TOK.float80Literal: case TOK.imaginary32Literal: case TOK.imaginary64Literal: case TOK.imaginary80Literal: case TOK.string_: t = peek(t); break; case TOK.leftParenthesis: // ( expression ) if (!skipParens(t, &t)) return false; break; case TOK._Generic: t = peek(t); if (!skipParens(t, &t)) return false; break; default: return false; } pt = t; return true; } //} /******************************************************************************/ /********************************* More ***************************************/ //{ /************** * Declaration context */ enum LVL { global = 1, /// global parameter = 2, /// function parameter (declarations for function identifier-list) prototype = 4, /// function prototype local = 8, /// local member = 0x10, /// struct member } /// Types of declarator to parse enum DTR { xdirect = 1, /// C11 6.7.6 direct-declarator xabstract = 2, /// C11 6.7.7 abstract-declarator xparameter = 3, /// parameter declarator may be either direct or abstract } /// C11 6.7.1 Storage-class specifiers enum SCW : uint { xnone = 0, xtypedef = 1, xextern = 2, xstatic = 4, x_Thread_local = 8, xauto = 0x10, xregister = 0x20, // C11 6.7.4 Function specifiers xinline = 0x40, x_Noreturn = 0x80, } /// C11 6.7.3 Type qualifiers enum MOD : uint { xnone = 0, xconst = 1, xvolatile = 2, xrestrict = 4, x_Atomic = 8, x__stdcall = 0x10, // Windows linkage extension } /********************************** * Aggregate for all the various specifiers */ struct Specifier { bool noreturn; /// noreturn attribute SCW scw; /// storage-class specifiers MOD mod; /// type qualifiers AST.Expressions* alignExps; /// alignment structalign_t packalign; /// #pragma pack alignment value } /*********************** * Convert from C specifiers to D storage class * Params: * level = declaration context * specifier = specifiers, context, etc. * Returns: * corresponding D storage class */ StorageClass specifiersToSTC(LVL level, const ref Specifier specifier) { StorageClass stc; if (specifier.scw & SCW.x_Thread_local) { if (level == LVL.global) { if (specifier.scw & SCW.xextern) stc = AST.STC.extern_; } else if (level == LVL.local) { if (specifier.scw & SCW.xextern) stc = AST.STC.extern_; else if (specifier.scw & SCW.xstatic) stc = AST.STC.static_; } else if (level == LVL.member) { if (specifier.scw & SCW.xextern) stc = AST.STC.extern_; else if (specifier.scw & SCW.xstatic) stc = AST.STC.static_; } } else { if (level == LVL.global) { if (specifier.scw & SCW.xextern) stc = AST.STC.extern_ | AST.STC.gshared; else if (specifier.scw & SCW.xstatic) stc = AST.STC.gshared | AST.STC.static_; else stc = AST.STC.gshared; } else if (level == LVL.local) { if (specifier.scw & SCW.xextern) stc = AST.STC.extern_ | AST.STC.gshared; else if (specifier.scw & SCW.xstatic) stc = AST.STC.gshared; else if (specifier.scw & SCW.xregister) stc = AST.STC.register; } else if (level == LVL.parameter) { if (specifier.scw & SCW.xregister) stc = AST.STC.register | AST.STC.parameter; else stc = AST.STC.parameter; } else if (level == LVL.member) { if (specifier.scw & SCW.xextern) stc = AST.STC.extern_ | AST.STC.gshared; else if (specifier.scw & SCW.xstatic) stc = AST.STC.gshared; } } return stc; } /*********************** * Return suitable signed integer type for the given size * Params: * size = size of type * Returns: * corresponding signed D integer type */ private AST.Type integerTypeForSize(ubyte size) { if (size <= 1) return AST.Type.tint8; if (size <= 2) return AST.Type.tint16; if (size <= 4) return AST.Type.tint32; if (size <= 8) return AST.Type.tint64; error("unsupported integer type"); return AST.Type.terror; } /*********************** * Return suitable unsigned integer type for the given size * Params: * size = size of type * Returns: * corresponding unsigned D integer type */ private AST.Type unsignedTypeForSize(ubyte size) { if (size <= 1) return AST.Type.tuns8; if (size <= 2) return AST.Type.tuns16; if (size <= 4) return AST.Type.tuns32; if (size <= 8) return AST.Type.tuns64; error("unsupported integer type"); return AST.Type.terror; } /*********************** * Return suitable D float type for C `long double` * Params: * flags = kind of float to return (real, imaginary, complex). * Returns: * corresponding D type */ private AST.Type realType(RTFlags flags) { if (long_doublesize == AST.Type.tfloat80.size()) { // On GDC and LDC, D `real` types map to C `long double`, so never // return a double type when real.sizeof == double.sizeof. final switch (flags) { case RTFlags.realfloat: return AST.Type.tfloat80; case RTFlags.imaginary: return AST.Type.timaginary80; case RTFlags.complex: return AST.Type.tcomplex80; } } else { final switch (flags) { case RTFlags.realfloat: return long_doublesize == 8 ? AST.Type.tfloat64 : AST.Type.tfloat80; case RTFlags.imaginary: return long_doublesize == 8 ? AST.Type.timaginary64 : AST.Type.timaginary80; case RTFlags.complex: return long_doublesize == 8 ? AST.Type.tcomplex64 : AST.Type.tcomplex80; } } } /************** * Flags for realType */ private enum RTFlags { realfloat, imaginary, complex, } /******************** * C11 6.4.2.2 Create declaration to predefine __func__ * `static const char __func__[] = " function-name ";` * Params: * loc = location for this declaration * id = identifier of function * Returns: * statement representing the declaration of __func__ */ private AST.Statement createFuncName(Loc loc, Identifier id) { const fn = id.toString(); // function-name auto efn = new AST.StringExp(loc, fn, fn.length, 1, 'c'); auto ifn = new AST.ExpInitializer(loc, efn); auto lenfn = new AST.IntegerExp(loc, fn.length + 1, AST.Type.tuns32); // +1 for terminating 0 auto tfn = new AST.TypeSArray(AST.Type.tchar, lenfn); efn.type = tfn.immutableOf(); efn.committed = 1; auto sfn = new AST.VarDeclaration(loc, tfn, Id.__func__, ifn, STC.gshared | STC.immutable_); auto e = new AST.DeclarationExp(loc, sfn); return new AST.ExpStatement(loc, e); } /************************ * After encountering an error, scan forward until a right brace or ; is found * or the end of the file. */ void panic() { while (token.value != TOK.rightCurly && token.value != TOK.semicolon && token.value != TOK.endOfFile) nextToken(); } /************************** * Apply `const` to a type. * Params: * t = type to add const to * Returns: * resulting type */ private AST.Type toConst(AST.Type t) { // `const` is always applied to the return type, not the // type function itself. if (auto tf = t.isTypeFunction()) tf.next = tf.next.addSTC(STC.const_); else if (auto tt = t.isTypeTag()) tt.mod |= MODFlags.const_; else t = t.addSTC(STC.const_); return t; } /*************************** * Apply specifier to a Dsymbol. * Params: * s = Dsymbol * specifier = specifiers to apply * Returns: * Dsymbol with specifiers applied */ private AST.Dsymbol applySpecifier(AST.Dsymbol s, ref Specifier specifier) { //printf("applySpecifier() %s\n", s.toChars()); if (specifier.alignExps) { //printf(" applying _Alignas %s, packalign %d\n", (*specifier.alignExps)[0].toChars(), cast(int)specifier.packalign); // Wrap declaration in an AlignDeclaration auto decls = new AST.Dsymbols(1); (*decls)[0] = s; s = new AST.AlignDeclaration(s.loc, specifier.alignExps, decls); } else if (!specifier.packalign.isDefault()) { //printf(" applying packalign %d\n", cast(int)specifier.packalign); // Wrap #pragma pack in an AlignDeclaration auto decls = new AST.Dsymbols(1); (*decls)[0] = s; s = new AST.AlignDeclaration(s.loc, specifier.packalign, decls); } return s; } //} /******************************************************************************/ /************************** typedefTab symbol table ***************************/ //{ /******************************** * Determines if type t is a function type. * Params: * t = type to test * Returns: * true if it represents a function */ bool isFunctionTypedef(AST.Type t) { //printf("isFunctionTypedef() %s\n", t.toChars()); if (t.isTypeFunction()) return true; if (auto tid = t.isTypeIdentifier()) { auto pt = lookupTypedef(tid.ident); if (pt && *pt) { return (*pt).isTypeFunction() !is null; } } return false; } /******************************** * Determine if `id` is a symbol for a Typedef. * Params: * id = possible typedef * Returns: * true if id is a Type */ bool isTypedef(Identifier id) { auto pt = lookupTypedef(id); return (pt && *pt); } /******************************* * Add `id` to typedefTab[], but only if it will mask an existing typedef. * Params: id = identifier for non-typedef symbol */ void insertIdToTypedefTab(Identifier id) { //printf("insertIdToTypedefTab(id: %s) level %d\n", id.toChars(), cast(int)typedefTab.length - 1); if (isTypedef(id)) // if existing typedef { /* Add id as null, so we can later distinguish it from a non-null typedef */ auto tab = cast(void*[void*])(typedefTab[$ - 1]); tab[cast(void*)id] = cast(void*)null; } } /******************************* * Add `id` to typedefTab[] * Params: * id = identifier for typedef symbol * t = type of the typedef symbol */ void insertTypedefToTypedefTab(Identifier id, AST.Type t) { //printf("insertTypedefToTypedefTab(id: %s, t: %s) level %d\n", id.toChars(), t ? t.toChars() : "null".ptr, cast(int)typedefTab.length - 1); if (auto tid = t.isTypeIdentifier()) { // Try to resolve the TypeIdentifier to its type auto pt = lookupTypedef(tid.ident); if (pt && *pt) t = *pt; } auto tab = cast(void*[void*])(typedefTab[$ - 1]); tab[cast(void*)id] = cast(void*)t; typedefTab[$ - 1] = cast(void*)tab; } /********************************* * Lookup id in typedefTab[]. * Returns: * if not found, then null. * if found, then Type*. Deferencing it will yield null if it is not * a typedef, and a type if it is a typedef. */ AST.Type* lookupTypedef(Identifier id) { foreach_reverse (tab; typedefTab[]) { if (auto pt = cast(void*)id in cast(void*[void*])tab) { return cast(AST.Type*)pt; } } return null; // not found } //} /******************************************************************************/ /********************************* Directive Parser ***************************/ //{ override bool parseSpecialTokenSequence() { Token n; scan(&n); if (n.value == TOK.int32Literal) { poundLine(n, true); return true; } if (n.value == TOK.identifier) { if (n.ident == Id.line) { poundLine(n, false); return true; } else if (defines && (n.ident == Id.define || n.ident == Id.undef)) { /* Append this line to `defines`. * Not canonicalizing it - assume it already is */ defines.writeByte('#'); defines.writestring(n.ident.toString()); skipToNextLine(defines); defines.writeByte('\n'); return true; } else if (n.ident == Id.__pragma) { pragmaDirective(scanloc); return true; } } if (n.ident != Id.undef) error("C preprocessor directive `#%s` is not supported", n.toChars()); return false; } /********************************************* * VC __pragma * https://docs.microsoft.com/en-us/cpp/preprocessor/pragma-directives-and-the-pragma-keyword?view=msvc-170 * Scanner is on the `__pragma` * Params: * startloc = location to use for error messages */ private void uupragmaDirective(const ref Loc startloc) { const loc = startloc; nextToken(); if (token.value != TOK.leftParenthesis) { error(loc, "left parenthesis expected to follow `__pragma`"); return; } nextToken(); if (token.value == TOK.identifier && token.ident == Id.pack) pragmaPack(startloc, false); else error(loc, "unrecognized __pragma"); if (token.value != TOK.rightParenthesis) { error(loc, "right parenthesis expected to close `__pragma(...)`"); return; } nextToken(); } /********************************************* * C11 6.10.6 Pragma directive * # pragma pp-tokens(opt) new-line * The C preprocessor sometimes leaves pragma directives in * the preprocessed output. Ignore them. * Upon return, p is at start of next line. */ private void pragmaDirective(const ref Loc loc) { Token n; scan(&n); if (n.value == TOK.identifier && n.ident == Id.pack) return pragmaPack(loc, true); if (n.value != TOK.endOfLine) skipToNextLine(); } /********* * # pragma pack * https://gcc.gnu.org/onlinedocs/gcc-4.4.4/gcc/Structure_002dPacking-Pragmas.html * https://docs.microsoft.com/en-us/cpp/preprocessor/pack * Scanner is on the `pack` * Params: * startloc = location to use for error messages * useScan = use scan() to retrieve next token, instead of nextToken() */ private void pragmaPack(const ref Loc startloc, bool useScan) { const loc = startloc; /* Pull tokens from scan() or nextToken() */ void scan(Token* t) { if (useScan) { Lexer.scan(t); } else { nextToken(); *t = token; } } Token n; scan(&n); if (n.value != TOK.leftParenthesis) { error(loc, "left parenthesis expected to follow `#pragma pack`"); if (n.value != TOK.endOfLine) skipToNextLine(); return; } void closingParen() { if (n.value != TOK.rightParenthesis) { error(loc, "right parenthesis expected to close `#pragma pack(`"); } if (n.value != TOK.endOfLine) skipToNextLine(); } void setPackAlign(ref const Token t) { const n = t.unsvalue; if (n < 1 || n & (n - 1) || ushort.max < n) error(loc, "pack must be an integer positive power of 2, not 0x%llx", cast(ulong)n); packalign.set(cast(uint)n); packalign.setPack(true); } scan(&n); if (!records) { records = new Array!Identifier; packs = new Array!structalign_t; } /* # pragma pack ( show ) */ if (n.value == TOK.identifier && n.ident == Id.show) { if (packalign.isDefault()) warning(startloc, "current pack attribute is default"); else warning(startloc, "current pack attribute is %d", packalign.get()); scan(&n); return closingParen(); } /* # pragma pack ( push ) * # pragma pack ( push , identifier ) * # pragma pack ( push , integer ) * # pragma pack ( push , identifier , integer ) */ if (n.value == TOK.identifier && n.ident == Id.push) { scan(&n); Identifier record = null; if (n.value == TOK.comma) { scan(&n); if (n.value == TOK.identifier) { record = n.ident; scan(&n); if (n.value == TOK.comma) { scan(&n); if (n.value == TOK.int32Literal) { setPackAlign(n); scan(&n); } else error(loc, "alignment value expected, not `%s`", n.toChars()); } } else if (n.value == TOK.int32Literal) { setPackAlign(n); scan(&n); } else error(loc, "alignment value expected, not `%s`", n.toChars()); } this.records.push(record); this.packs.push(packalign); return closingParen(); } /* # pragma pack ( pop ) * # pragma pack ( pop PopList ) * PopList : * , IdentifierOrInteger * , IdentifierOrInteger PopList * IdentifierOrInteger: * identifier * integer */ if (n.value == TOK.identifier && n.ident == Id.pop) { scan(&n); size_t len = this.records.length; if (n.value == TOK.rightParenthesis) // #pragma pack ( pop ) { if (len == 0) // nothing to pop return closingParen(); this.records.setDim(len - 1); this.packs.setDim(len - 1); if (len == 1) // stack is now empty packalign.setDefault(); else packalign = (*this.packs)[len - 1]; return closingParen(); } while (n.value == TOK.comma) // #pragma pack ( pop , { scan(&n); if (n.value == TOK.identifier) { /* pragma pack(pop, identifier * Pop until identifier is found, pop that one too, and set * alignment to the new top of the stack. * If identifier is not found, do nothing. */ for ( ; len; --len) { if ((*this.records)[len - 1] == n.ident) { this.records.setDim(len - 1); this.packs.setDim(len - 1); if (len > 1) packalign = (*this.packs)[len - 2]; else packalign.setDefault(); // stack empty, use default break; } } scan(&n); } else if (n.value == TOK.int32Literal) { setPackAlign(n); scan(&n); } else { error(loc, "identifier or alignment value expected following `#pragma pack(pop,` not `%s`", n.toChars()); scan(&n); } } return closingParen(); } /* # pragma pack ( integer ) * Sets alignment to integer */ if (n.value == TOK.int32Literal) { setPackAlign(n); scan(&n); return closingParen(); } /* # pragma pack ( ) * Sets alignment to default */ if (n.value == TOK.rightParenthesis) { packalign.setDefault(); return closingParen(); } error(loc, "unrecognized `#pragma pack(%s)`", n.toChars()); if (n.value != TOK.endOfLine) skipToNextLine(); } //} /******************************************************************************/ /********************************* #define Parser *****************************/ //{ /** * Go through the #define's in the defines buffer and see what we can convert * to Dsymbols, which are then appended to symbols[] */ void addDefines() { if (!defines || defines.length < 10) // minimum length of a #define line return; OutBuffer* buf = defines; defines = null; // prevent skipToNextLine() and parseSpecialTokenSequence() // from appending to slice[] const length = buf.length; buf.writeByte(0); auto slice = buf.peekChars()[0 .. length]; resetDefineLines(slice); // reset lexer const(char)* endp = &slice[length - 7]; size_t[void*] defineTab; // hash table of #define's turned into Symbol's // indexed by Identifier, returns index into symbols[] // The memory for this is leaked void addVar(AST.VarDeclaration v) { /* If it's already defined, replace the earlier * definition */ if (size_t* pd = cast(void*)v.ident in defineTab) { //printf("replacing %s\n", v.toChars()); (*symbols)[*pd] = v; return; } defineTab[cast(void*)v.ident] = symbols.length; symbols.push(v); } Token n; while (p < endp) { if (p[0 .. 7] == "#define") { p += 7; scan(&n); //printf("%s\n", n.toChars()); if (n.value == TOK.identifier) { auto id = n.ident; scan(&n); AST.Type t; switch (n.value) { case TOK.endOfLine: // #define identifier nextDefineLine(); continue; case TOK.int32Literal: case TOK.charLiteral: t = AST.Type.tint32; goto Linteger; case TOK.uns32Literal: t = AST.Type.tuns32; goto Linteger; case TOK.int64Literal: t = AST.Type.tint64; goto Linteger; case TOK.uns64Literal: t = AST.Type.tuns64; goto Linteger; Linteger: const intvalue = n.intvalue; scan(&n); if (n.value == TOK.endOfLine) { /* Declare manifest constant: * enum id = intvalue; */ AST.Expression e = new AST.IntegerExp(scanloc, intvalue, t); auto v = new AST.VarDeclaration(scanloc, t, id, new AST.ExpInitializer(scanloc, e), STC.manifest); addVar(v); nextDefineLine(); continue; } break; case TOK.float32Literal: t = AST.Type.tfloat32; goto Lfloat; case TOK.float64Literal: t = AST.Type.tfloat64; goto Lfloat; case TOK.float80Literal: t = AST.Type.tfloat80; goto Lfloat; case TOK.imaginary32Literal: t = AST.Type.timaginary32; goto Lfloat; case TOK.imaginary64Literal: t = AST.Type.timaginary64; goto Lfloat; case TOK.imaginary80Literal: t = AST.Type.timaginary80; goto Lfloat; Lfloat: const floatvalue = n.floatvalue; scan(&n); if (n.value == TOK.endOfLine) { /* Declare manifest constant: * enum id = floatvalue; */ AST.Expression e = new AST.RealExp(scanloc, floatvalue, t); auto v = new AST.VarDeclaration(scanloc, t, id, new AST.ExpInitializer(scanloc, e), STC.manifest); addVar(v); nextDefineLine(); continue; } break; case TOK.string_: const str = n.ustring; const len = n.len; const postfix = n.postfix; scan(&n); if (n.value == TOK.endOfLine) { /* Declare manifest constant: * enum id = "string"; */ AST.Expression e = new AST.StringExp(scanloc, str[0 .. len], len, 1, postfix); auto v = new AST.VarDeclaration(scanloc, null, id, new AST.ExpInitializer(scanloc, e), STC.manifest); addVar(v); nextDefineLine(); continue; } break; default: break; } } skipToNextLine(); } else { scan(&n); if (n.value != TOK.endOfLine) { skipToNextLine(); } } nextDefineLine(); } defines = buf; } //} }