aboutsummaryrefslogtreecommitdiff
path: root/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp
blob: 24674a407cb369373a8f971248a0206b4435dd08 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "RawStringLiteralCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/StringRef.h"
#include <optional>

using namespace clang::ast_matchers;

namespace clang::tidy::modernize {

namespace {

bool containsEscapes(StringRef HayStack, StringRef Escapes) {
  size_t BackSlash = HayStack.find('\\');
  if (BackSlash == StringRef::npos)
    return false;

  while (BackSlash != StringRef::npos) {
    if (!Escapes.contains(HayStack[BackSlash + 1]))
      return false;
    BackSlash = HayStack.find('\\', BackSlash + 2);
  }

  return true;
}

bool isRawStringLiteral(StringRef Text) {
  // Already a raw string literal if R comes before ".
  const size_t QuotePos = Text.find('"');
  assert(QuotePos != StringRef::npos);
  return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
}

bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
                               const StringLiteral *Literal,
                               const CharsBitSet &DisallowedChars) {
  // FIXME: Handle L"", u8"", u"" and U"" literals.
  if (!Literal->isOrdinary())
    return false;

  for (const unsigned char C : Literal->getBytes())
    if (DisallowedChars.test(C))
      return false;

  CharSourceRange CharRange = Lexer::makeFileCharRange(
      CharSourceRange::getTokenRange(Literal->getSourceRange()),
      *Result.SourceManager, Result.Context->getLangOpts());
  StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
                                        Result.Context->getLangOpts());
  if (Text.empty() || isRawStringLiteral(Text))
    return false;

  return containsEscapes(Text, R"('\"?x01)");
}

bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
  return Bytes.find(Delimiter.empty()
                        ? std::string(R"lit()")lit")
                        : (")" + Delimiter + R"(")")) != StringRef::npos;
}

} // namespace

RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
                                             ClangTidyContext *Context)
    : ClangTidyCheck(Name, Context),
      DelimiterStem(Options.get("DelimiterStem", "lit")),
      ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
  // Non-printing characters are disallowed:
  // \007 = \a bell
  // \010 = \b backspace
  // \011 = \t horizontal tab
  // \012 = \n new line
  // \013 = \v vertical tab
  // \014 = \f form feed
  // \015 = \r carriage return
  // \177 = delete
  for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
                                         "\b\t\n\v\f\r\016\017"
                                         "\020\021\022\023\024\025\026\027"
                                         "\030\031\032\033\034\035\036\037"
                                         "\177",
                                         33))
    DisallowedChars.set(C);

  // Non-ASCII are disallowed too.
  for (unsigned int C = 0x80U; C <= 0xFFU; ++C)
    DisallowedChars.set(static_cast<unsigned char>(C));
}

void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
  Options.store(Opts, "DelimiterStem", DelimiterStem);
  Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals);
}

void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
  Finder->addMatcher(
      stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
}

static std::optional<StringRef>
createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM,
                        const LangOptions &LangOpts) {
  const CharSourceRange TokenRange =
      CharSourceRange::getTokenRange(Literal->getSourceRange());
  Token T;
  if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, LangOpts))
    return std::nullopt;
  const CharSourceRange CharRange =
      Lexer::makeFileCharRange(TokenRange, SM, LangOpts);
  if (T.hasUDSuffix()) {
    StringRef Text = Lexer::getSourceText(CharRange, SM, LangOpts);
    const size_t UDSuffixPos = Text.find_last_of('"');
    if (UDSuffixPos == StringRef::npos)
      return std::nullopt;
    return Text.slice(UDSuffixPos + 1, Text.size());
  }
  return std::nullopt;
}

static std::string createRawStringLiteral(const StringLiteral *Literal,
                                          const std::string &DelimiterStem,
                                          const SourceManager &SM,
                                          const LangOptions &LangOpts) {
  const StringRef Bytes = Literal->getBytes();
  std::string Delimiter;
  for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
    Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
  }

  std::optional<StringRef> UserDefinedSuffix =
      createUserDefinedSuffix(Literal, SM, LangOpts);

  if (Delimiter.empty())
    return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or(""))
        .str();

  return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" +
          UserDefinedSuffix.value_or(""))
      .str();
}

static bool compareStringLength(StringRef Replacement,
                                const StringLiteral *Literal,
                                const SourceManager &SM,
                                const LangOptions &LangOpts) {
  return Replacement.size() <=
         Lexer::MeasureTokenLength(Literal->getBeginLoc(), SM, LangOpts);
}

void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
  const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
  if (Literal->getBeginLoc().isMacroID())
    return;
  const SourceManager &SM = *Result.SourceManager;
  const LangOptions &LangOpts = getLangOpts();
  if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
    const std::string Replacement =
        createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts);
    if (ReplaceShorterLiterals ||
        compareStringLength(Replacement, Literal, SM, LangOpts)) {
      diag(Literal->getBeginLoc(),
           "escaped string literal can be written as a raw string literal")
          << FixItHint::CreateReplacement(Literal->getSourceRange(),
                                          Replacement);
    }
  }
}

} // namespace clang::tidy::modernize