//===- Token.cpp - MLIR Token Implementation ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the Token class for the MLIR textual form. // //===----------------------------------------------------------------------===// #include "Token.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include #include #include #include using namespace mlir; SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); } SMLoc Token::getEndLoc() const { return SMLoc::getFromPointer(spelling.data() + spelling.size()); } SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); } /// For an integer token, return its value as an unsigned. If it doesn't fit, /// return std::nullopt. std::optional Token::getUnsignedIntegerValue() const { bool isHex = spelling.size() > 1 && spelling[1] == 'x'; unsigned result = 0; if (spelling.getAsInteger(isHex ? 0 : 10, result)) return std::nullopt; return result; } /// For an integer token, return its value as a uint64_t. If it doesn't fit, /// return std::nullopt. std::optional Token::getUInt64IntegerValue(StringRef spelling) { bool isHex = spelling.size() > 1 && spelling[1] == 'x'; uint64_t result = 0; if (spelling.getAsInteger(isHex ? 0 : 10, result)) return std::nullopt; return result; } /// For a floatliteral, return its value as a double. Return std::nullopt if the /// value underflows or overflows. std::optional Token::getFloatingPointValue() const { double result = 0; if (spelling.getAsDouble(result)) return std::nullopt; return result; } /// For an inttype token, return its bitwidth. std::optional Token::getIntTypeBitwidth() const { assert(getKind() == inttype); unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2); unsigned result = 0; if (spelling.drop_front(bitwidthStart).getAsInteger(10, result)) return std::nullopt; return result; } std::optional Token::getIntTypeSignedness() const { assert(getKind() == inttype); if (spelling[0] == 'i') return std::nullopt; if (spelling[0] == 's') return true; assert(spelling[0] == 'u'); return false; } /// Given a token containing a string literal, return its value, including /// removing the quote characters and unescaping the contents of the string. The /// lexer has already verified that this token is valid. std::string Token::getStringValue() const { assert(getKind() == string || getKind() == code_complete || (getKind() == at_identifier && getSpelling()[1] == '"')); // Start by dropping the quotes. StringRef bytes = getSpelling().drop_front(); if (getKind() != Token::code_complete) { bytes = bytes.drop_back(); if (getKind() == at_identifier) bytes = bytes.drop_front(); } std::string result; result.reserve(bytes.size()); for (unsigned i = 0, e = bytes.size(); i != e;) { auto c = bytes[i++]; if (c != '\\') { result.push_back(c); continue; } assert(i + 1 <= e && "invalid string should be caught by lexer"); auto c1 = bytes[i++]; switch (c1) { case '"': case '\\': result.push_back(c1); continue; case 'n': result.push_back('\n'); continue; case 't': result.push_back('\t'); continue; default: break; } assert(i + 1 <= e && "invalid string should be caught by lexer"); auto c2 = bytes[i++]; assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape"); result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2)); } return result; } /// Given a token containing a hex string literal, return its value or /// std::nullopt if the token does not contain a valid hex string. std::optional Token::getHexStringValue() const { assert(getKind() == string); // Get the internal string data, without the quotes. StringRef bytes = getSpelling().drop_front().drop_back(); // Try to extract the binary data from the hex string. We expect the hex // string to start with `0x` and have an even number of hex nibbles (nibbles // should come in pairs). std::string hex; if (!bytes.consume_front("0x") || (bytes.size() & 1) || !llvm::tryGetFromHex(bytes, hex)) return std::nullopt; return hex; } /// Given a token containing a symbol reference, return the unescaped string /// value. std::string Token::getSymbolReference() const { assert(is(Token::at_identifier) && "expected valid @-identifier"); StringRef nameStr = getSpelling().drop_front(); // Check to see if the reference is a string literal, or a bare identifier. if (nameStr.front() == '"') return getStringValue(); return std::string(nameStr); } /// Given a hash_identifier token like #123, try to parse the number out of /// the identifier, returning std::nullopt if it is a named identifier like #x /// or if the integer doesn't fit. std::optional Token::getHashIdentifierNumber() const { assert(getKind() == hash_identifier); unsigned result = 0; if (spelling.drop_front().getAsInteger(10, result)) return std::nullopt; return result; } /// Given a punctuation or keyword token kind, return the spelling of the /// token as a string. Warning: This will abort on markers, identifiers and /// literal tokens since they have no fixed spelling. StringRef Token::getTokenSpelling(Kind kind) { switch (kind) { default: llvm_unreachable("This token kind has no fixed spelling"); #define TOK_PUNCTUATION(NAME, SPELLING) \ case NAME: \ return SPELLING; #define TOK_KEYWORD(SPELLING) \ case kw_##SPELLING: \ return #SPELLING; #include "TokenKinds.def" } } /// Return true if this is one of the keyword token kinds (e.g. kw_if). bool Token::isKeyword() const { switch (kind) { default: return false; #define TOK_KEYWORD(SPELLING) \ case kw_##SPELLING: \ return true; #include "TokenKinds.def" } } bool Token::isCodeCompletionFor(Kind kind) const { if (!isCodeCompletion() || spelling.empty()) return false; switch (kind) { case Kind::string: return spelling[0] == '"'; case Kind::hash_identifier: return spelling[0] == '#'; case Kind::percent_identifier: return spelling[0] == '%'; case Kind::caret_identifier: return spelling[0] == '^'; case Kind::exclamation_identifier: return spelling[0] == '!'; default: return false; } }