clang-tools  9.0.0
SemanticHighlighting.cpp
Go to the documentation of this file.
1 //===--- SemanticHighlighting.cpp - ------------------------- ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SemanticHighlighting.h"
10 #include "Logger.h"
11 #include "Protocol.h"
12 #include "SourceCode.h"
13 #include "clang/AST/ASTContext.h"
14 #include "clang/AST/RecursiveASTVisitor.h"
15 
16 namespace clang {
17 namespace clangd {
18 namespace {
19 
20 // Collects all semantic tokens in an ASTContext.
21 class HighlightingTokenCollector
22  : public RecursiveASTVisitor<HighlightingTokenCollector> {
23  std::vector<HighlightingToken> Tokens;
24  ASTContext &Ctx;
25  const SourceManager &SM;
26 
27 public:
28  HighlightingTokenCollector(ParsedAST &AST)
29  : Ctx(AST.getASTContext()), SM(AST.getSourceManager()) {}
30 
31  std::vector<HighlightingToken> collectTokens() {
32  Tokens.clear();
33  TraverseAST(Ctx);
34  // Initializer lists can give duplicates of tokens, therefore all tokens
35  // must be deduplicated.
36  llvm::sort(Tokens,
37  [](const HighlightingToken &L, const HighlightingToken &R) {
38  return std::tie(L.R, L.Kind) < std::tie(R.R, R.Kind);
39  });
40  auto Last = std::unique(Tokens.begin(), Tokens.end());
41  Tokens.erase(Last, Tokens.end());
42  return Tokens;
43  }
44 
45  bool VisitNamespaceAliasDecl(NamespaceAliasDecl *NAD) {
46  // The target namespace of an alias can not be found in any other way.
47  addToken(NAD->getTargetNameLoc(), HighlightingKind::Namespace);
48  return true;
49  }
50 
51  bool VisitMemberExpr(MemberExpr *ME) {
52  const auto *MD = ME->getMemberDecl();
53  if (isa<CXXDestructorDecl>(MD))
54  // When calling the destructor manually like: AAA::~A(); The ~ is a
55  // MemberExpr. Other methods should still be highlighted though.
56  return true;
57  addToken(ME->getMemberLoc(), MD);
58  return true;
59  }
60 
61  bool VisitNamedDecl(NamedDecl *ND) {
62  // UsingDirectiveDecl's namespaces do not show up anywhere else in the
63  // Visit/Traverse mehods. But they should also be highlighted as a
64  // namespace.
65  if (const auto *UD = dyn_cast<UsingDirectiveDecl>(ND)) {
66  addToken(UD->getIdentLocation(), HighlightingKind::Namespace);
67  return true;
68  }
69 
70  // Constructors' TypeLoc has a TypePtr that is a FunctionProtoType. It has
71  // no tag decl and therefore constructors must be gotten as NamedDecls
72  // instead.
73  if (ND->getDeclName().getNameKind() ==
74  DeclarationName::CXXConstructorName) {
75  addToken(ND->getLocation(), ND);
76  return true;
77  }
78 
79  if (ND->getDeclName().getNameKind() != DeclarationName::Identifier)
80  return true;
81 
82  addToken(ND->getLocation(), ND);
83  return true;
84  }
85 
86  bool VisitDeclRefExpr(DeclRefExpr *Ref) {
87  if (Ref->getNameInfo().getName().getNameKind() !=
89  // Only want to highlight identifiers.
90  return true;
91 
92  addToken(Ref->getLocation(), Ref->getDecl());
93  return true;
94  }
95 
96  bool VisitTypedefNameDecl(TypedefNameDecl *TD) {
97  if(const auto *TSI = TD->getTypeSourceInfo())
98  addTypeLoc(TD->getLocation(), TSI->getTypeLoc());
99  return true;
100  }
101 
102  bool VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc &TL) {
103  // TemplateTypeParmTypeLoc does not have a TagDecl in its type ptr.
104  addToken(TL.getBeginLoc(), TL.getDecl());
105  return true;
106  }
107 
108  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc &TL) {
109  if (const TemplateDecl *TD =
110  TL.getTypePtr()->getTemplateName().getAsTemplateDecl())
111  addToken(TL.getBeginLoc(), TD);
112  return true;
113  }
114 
115  bool VisitTypeLoc(TypeLoc &TL) {
116  // This check is for not getting two entries when there are anonymous
117  // structs. It also makes us not highlight certain namespace qualifiers
118  // twice. For elaborated types the actual type is highlighted as an inner
119  // TypeLoc.
120  if (TL.getTypeLocClass() == TypeLoc::TypeLocClass::Elaborated)
121  return true;
122 
123  addTypeLoc(TL.getBeginLoc(), TL);
124  return true;
125  }
126 
127  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSLoc) {
128  if (NestedNameSpecifier *NNS = NNSLoc.getNestedNameSpecifier())
129  if (NNS->getKind() == NestedNameSpecifier::Namespace ||
130  NNS->getKind() == NestedNameSpecifier::NamespaceAlias)
131  addToken(NNSLoc.getLocalBeginLoc(), HighlightingKind::Namespace);
132 
133  return RecursiveASTVisitor<
134  HighlightingTokenCollector>::TraverseNestedNameSpecifierLoc(NNSLoc);
135  }
136 
137 private:
138  void addTypeLoc(SourceLocation Loc, const TypeLoc &TL) {
139  if (const Type *TP = TL.getTypePtr())
140  if (const TagDecl *TD = TP->getAsTagDecl())
141  addToken(Loc, TD);
142  }
143 
144  void addToken(SourceLocation Loc, const NamedDecl *D) {
145  if (D->getDeclName().isIdentifier() && D->getName().empty())
146  // Don't add symbols that don't have any length.
147  return;
148  // We highlight class decls, constructor decls and destructor decls as
149  // `Class` type. The destructor decls are handled in `VisitTypeLoc` (we will
150  // visit a TypeLoc where the underlying Type is a CXXRecordDecl).
151  if (isa<ClassTemplateDecl>(D)) {
152  addToken(Loc, HighlightingKind::Class);
153  return;
154  }
155  if (isa<RecordDecl>(D)) {
156  addToken(Loc, HighlightingKind::Class);
157  return;
158  }
159  if (isa<CXXConstructorDecl>(D)) {
160  addToken(Loc, HighlightingKind::Class);
161  return;
162  }
163  if (isa<CXXMethodDecl>(D)) {
164  addToken(Loc, HighlightingKind::Method);
165  return;
166  }
167  if (isa<FieldDecl>(D)) {
168  addToken(Loc, HighlightingKind::Field);
169  return;
170  }
171  if (isa<EnumDecl>(D)) {
172  addToken(Loc, HighlightingKind::Enum);
173  return;
174  }
175  if (isa<EnumConstantDecl>(D)) {
176  addToken(Loc, HighlightingKind::EnumConstant);
177  return;
178  }
179  if (isa<VarDecl>(D)) {
180  addToken(Loc, HighlightingKind::Variable);
181  return;
182  }
183  if (isa<FunctionDecl>(D)) {
184  addToken(Loc, HighlightingKind::Function);
185  return;
186  }
187  if (isa<NamespaceDecl>(D)) {
188  addToken(Loc, HighlightingKind::Namespace);
189  return;
190  }
191  if (isa<NamespaceAliasDecl>(D)) {
192  addToken(Loc, HighlightingKind::Namespace);
193  return;
194  }
195  if (isa<TemplateTemplateParmDecl>(D)) {
197  return;
198  }
199  if (isa<TemplateTypeParmDecl>(D)) {
201  return;
202  }
203  }
204 
205  void addToken(SourceLocation Loc, HighlightingKind Kind) {
206  if (Loc.isMacroID())
207  // FIXME: skip tokens inside macros for now.
208  return;
209 
210  auto R = getTokenRange(SM, Ctx.getLangOpts(), Loc);
211  if (!R) {
212  // R should always have a value, if it doesn't something is very wrong.
213  elog("Tried to add semantic token with an invalid range");
214  return;
215  }
216 
217  Tokens.push_back({Kind, R.getValue()});
218  }
219 };
220 
221 // Encode binary data into base64.
222 // This was copied from compiler-rt/lib/fuzzer/FuzzerUtil.cpp.
223 // FIXME: Factor this out into llvm/Support?
224 std::string encodeBase64(const llvm::SmallVectorImpl<char> &Bytes) {
225  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
226  "abcdefghijklmnopqrstuvwxyz"
227  "0123456789+/";
228  std::string Res;
229  size_t I;
230  for (I = 0; I + 2 < Bytes.size(); I += 3) {
231  uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8) + Bytes[I + 2];
232  Res += Table[(X >> 18) & 63];
233  Res += Table[(X >> 12) & 63];
234  Res += Table[(X >> 6) & 63];
235  Res += Table[X & 63];
236  }
237  if (I + 1 == Bytes.size()) {
238  uint32_t X = (Bytes[I] << 16);
239  Res += Table[(X >> 18) & 63];
240  Res += Table[(X >> 12) & 63];
241  Res += "==";
242  } else if (I + 2 == Bytes.size()) {
243  uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8);
244  Res += Table[(X >> 18) & 63];
245  Res += Table[(X >> 12) & 63];
246  Res += Table[(X >> 6) & 63];
247  Res += "=";
248  }
249  return Res;
250 }
251 
252 void write32be(uint32_t I, llvm::raw_ostream &OS) {
253  std::array<char, 4> Buf;
254  llvm::support::endian::write32be(Buf.data(), I);
255  OS.write(Buf.data(), Buf.size());
256 }
257 
258 void write16be(uint16_t I, llvm::raw_ostream &OS) {
259  std::array<char, 2> Buf;
260  llvm::support::endian::write16be(Buf.data(), I);
261  OS.write(Buf.data(), Buf.size());
262 }
263 } // namespace
264 
265 bool operator==(const HighlightingToken &Lhs, const HighlightingToken &Rhs) {
266  return Lhs.Kind == Rhs.Kind && Lhs.R == Rhs.R;
267 }
268 
269 std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST) {
270  return HighlightingTokenCollector(AST).collectTokens();
271 }
272 
273 std::vector<SemanticHighlightingInformation>
274 toSemanticHighlightingInformation(llvm::ArrayRef<HighlightingToken> Tokens) {
275  if (Tokens.size() == 0)
276  return {};
277 
278  // FIXME: Tokens might be multiple lines long (block comments) in this case
279  // this needs to add multiple lines for those tokens.
280  std::map<int, std::vector<HighlightingToken>> TokenLines;
281  for (const HighlightingToken &Token : Tokens)
282  TokenLines[Token.R.start.line].push_back(Token);
283 
284  std::vector<SemanticHighlightingInformation> Lines;
285  Lines.reserve(TokenLines.size());
286  for (const auto &Line : TokenLines) {
287  llvm::SmallVector<char, 128> LineByteTokens;
288  llvm::raw_svector_ostream OS(LineByteTokens);
289  for (const auto &Token : Line.second) {
290  // Writes the token to LineByteTokens in the byte format specified by the
291  // LSP proposal. Described below.
292  // |<---- 4 bytes ---->|<-- 2 bytes -->|<--- 2 bytes -->|
293  // | character | length | index |
294 
295  write32be(Token.R.start.character, OS);
296  write16be(Token.R.end.character - Token.R.start.character, OS);
297  write16be(static_cast<int>(Token.Kind), OS);
298  }
299 
300  Lines.push_back({Line.first, encodeBase64(LineByteTokens)});
301  }
302 
303  return Lines;
304 }
305 
306 llvm::StringRef toTextMateScope(HighlightingKind Kind) {
307  // FIXME: Add scopes for C and Objective C.
308  switch (Kind) {
310  return "entity.name.function.cpp";
312  return "entity.name.function.method.cpp";
314  return "variable.other.cpp";
316  return "variable.other.field.cpp";
318  return "entity.name.type.class.cpp";
320  return "entity.name.type.enum.cpp";
322  return "variable.other.enummember.cpp";
324  return "entity.name.namespace.cpp";
326  return "entity.name.type.template.cpp";
328  llvm_unreachable("must not pass NumKinds to the function");
329  }
330  llvm_unreachable("unhandled HighlightingKind");
331 }
332 
333 } // namespace clangd
334 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
llvm::StringRef toTextMateScope(HighlightingKind Kind)
Converts a HighlightingKind to a corresponding TextMate scope (https://manual.macromates.com/en/language_grammars).
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:56
BindArgumentKind Kind
Context Ctx
bool operator==(const Ref &L, const Ref &R)
Definition: Ref.h:61
StringRef Tokens
const Decl * D
Definition: XRefs.cpp:868
llvm::Optional< Range > getTokenRange(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation TokLoc)
Returns the taken range at TokLoc.
Definition: SourceCode.cpp:203
std::vector< SemanticHighlightingInformation > toSemanticHighlightingInformation(llvm::ArrayRef< HighlightingToken > Tokens)
Stores and provides access to parsed AST.
Definition: ClangdUnit.h:73
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static URISchemeRegistry::Add< TestScheme > X(TestScheme::Scheme, "Test schema")
static GeneratorRegistry::Add< MDGenerator > MD(MDGenerator::Format, "Generator for MD output.")
std::vector< HighlightingToken > getSemanticHighlightings(ParsedAST &AST)
NodeType Type
unsigned Lines