clang-tools  10.0.0git
SemanticHighlighting.cpp
Go to the documentation of this file.
1 //===--- SemanticHighlighting.cpp - ------------------------- ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SemanticHighlighting.h"
10 #include "FindTarget.h"
11 #include "Logger.h"
12 #include "ParsedAST.h"
13 #include "Protocol.h"
14 #include "SourceCode.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/DeclarationName.h"
19 #include "clang/AST/ExprCXX.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/AST/Type.h"
22 #include "clang/AST/TypeLoc.h"
23 #include "clang/Basic/LangOptions.h"
24 #include "clang/Basic/SourceLocation.h"
25 #include "clang/Basic/SourceManager.h"
26 #include "llvm/ADT/None.h"
27 #include "llvm/ADT/Optional.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/Support/Casting.h"
30 #include <algorithm>
31 
32 namespace clang {
33 namespace clangd {
34 namespace {
35 
36 /// Some names are not written in the source code and cannot be highlighted,
37 /// e.g. anonymous classes. This function detects those cases.
38 bool canHighlightName(DeclarationName Name) {
39  if (Name.getNameKind() == DeclarationName::CXXConstructorName ||
40  Name.getNameKind() == DeclarationName::CXXUsingDirective)
41  return true;
42  auto *II = Name.getAsIdentifierInfo();
43  return II && !II->getName().empty();
44 }
45 
46 llvm::Optional<HighlightingKind> kindForType(const Type *TP);
47 llvm::Optional<HighlightingKind> kindForDecl(const NamedDecl *D) {
48  if (auto *USD = dyn_cast<UsingShadowDecl>(D)) {
49  if (auto *Target = USD->getTargetDecl())
50  D = Target;
51  }
52  if (auto *TD = dyn_cast<TemplateDecl>(D)) {
53  if (auto *Templated = TD->getTemplatedDecl())
54  D = Templated;
55  }
56  if (auto *TD = dyn_cast<TypedefNameDecl>(D)) {
57  // We try to highlight typedefs as their underlying type.
58  if (auto K = kindForType(TD->getUnderlyingType().getTypePtrOrNull()))
59  return K;
60  // And fallback to a generic kind if this fails.
62  }
63  // We highlight class decls, constructor decls and destructor decls as
64  // `Class` type. The destructor decls are handled in `VisitTagTypeLoc` (we
65  // will visit a TypeLoc where the underlying Type is a CXXRecordDecl).
66  if (auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
67  // We don't want to highlight lambdas like classes.
68  if (RD->isLambda())
69  return llvm::None;
71  }
72  if (isa<ClassTemplateDecl>(D) || isa<RecordDecl>(D) ||
73  isa<CXXConstructorDecl>(D))
75  if (auto *MD = dyn_cast<CXXMethodDecl>(D))
76  return MD->isStatic() ? HighlightingKind::StaticMethod
78  if (isa<FieldDecl>(D))
80  if (isa<EnumDecl>(D))
82  if (isa<EnumConstantDecl>(D))
84  if (isa<ParmVarDecl>(D))
86  if (auto *VD = dyn_cast<VarDecl>(D))
87  return VD->isStaticDataMember()
89  : VD->isLocalVarDecl() ? HighlightingKind::LocalVariable
91  if (isa<BindingDecl>(D))
93  if (isa<FunctionDecl>(D))
95  if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D) ||
96  isa<UsingDirectiveDecl>(D))
98  if (isa<TemplateTemplateParmDecl>(D) || isa<TemplateTypeParmDecl>(D) ||
99  isa<NonTypeTemplateParmDecl>(D))
101  return llvm::None;
102 }
103 llvm::Optional<HighlightingKind> kindForType(const Type *TP) {
104  if (!TP)
105  return llvm::None;
106  if (TP->isBuiltinType()) // Builtins are special, they do not have decls.
108  if (auto *TD = dyn_cast<TemplateTypeParmType>(TP))
109  return kindForDecl(TD->getDecl());
110  if (auto *TD = TP->getAsTagDecl())
111  return kindForDecl(TD);
112  return llvm::None;
113 }
114 
115 llvm::Optional<HighlightingKind> kindForReference(const ReferenceLoc &R) {
116  llvm::Optional<HighlightingKind> Result;
117  for (const NamedDecl *Decl : R.Targets) {
118  if (!canHighlightName(Decl->getDeclName()))
119  return llvm::None;
120  auto Kind = kindForDecl(Decl);
121  if (!Kind || (Result && Kind != Result))
122  return llvm::None;
123  Result = Kind;
124  }
125  return Result;
126 }
127 
128 /// Consumes source locations and maps them to text ranges for highlightings.
129 class HighlightingsBuilder {
130 public:
131  HighlightingsBuilder(const SourceManager &SourceMgr,
132  const LangOptions &LangOpts)
133  : SourceMgr(SourceMgr), LangOpts(LangOpts) {}
134 
135  void addToken(HighlightingToken T) { Tokens.push_back(T); }
136 
137  void addToken(SourceLocation Loc, HighlightingKind Kind) {
138  if (Loc.isInvalid())
139  return;
140  if (Loc.isMacroID()) {
141  // Only intereseted in highlighting arguments in macros (DEF_X(arg)).
142  if (!SourceMgr.isMacroArgExpansion(Loc))
143  return;
144  Loc = SourceMgr.getSpellingLoc(Loc);
145  }
146 
147  // Non top level decls that are included from a header are not filtered by
148  // topLevelDecls. (example: method declarations being included from
149  // another file for a class from another file).
150  // There are also cases with macros where the spelling loc will not be in
151  // the main file and the highlighting would be incorrect.
152  if (!isInsideMainFile(Loc, SourceMgr))
153  return;
154 
155  auto Range = getTokenRange(SourceMgr, LangOpts, Loc);
156  if (!Range) {
157  // R should always have a value, if it doesn't something is very wrong.
158  elog("Tried to add semantic token with an invalid range");
159  return;
160  }
161  Tokens.push_back(HighlightingToken{Kind, *Range});
162  }
163 
164  std::vector<HighlightingToken> collect(ParsedAST &AST) && {
165  // Initializer lists can give duplicates of tokens, therefore all tokens
166  // must be deduplicated.
167  llvm::sort(Tokens);
168  auto Last = std::unique(Tokens.begin(), Tokens.end());
169  Tokens.erase(Last, Tokens.end());
170 
171  // Macros can give tokens that have the same source range but conflicting
172  // kinds. In this case all tokens sharing this source range should be
173  // removed.
174  std::vector<HighlightingToken> NonConflicting;
175  NonConflicting.reserve(Tokens.size());
176  for (ArrayRef<HighlightingToken> TokRef = Tokens; !TokRef.empty();) {
177  ArrayRef<HighlightingToken> Conflicting =
178  TokRef.take_while([&](const HighlightingToken &T) {
179  // TokRef is guaranteed at least one element here because otherwise
180  // this predicate would never fire.
181  return T.R == TokRef.front().R;
182  });
183  // If there is exactly one token with this range it's non conflicting and
184  // should be in the highlightings.
185  if (Conflicting.size() == 1)
186  NonConflicting.push_back(TokRef.front());
187  // TokRef[Conflicting.size()] is the next token with a different range (or
188  // the end of the Tokens).
189  TokRef = TokRef.drop_front(Conflicting.size());
190  }
191  // Add tokens indicating lines skipped by the preprocessor.
192  for (const Range &R : AST.getMacros().SkippedRanges) {
193  // Create one token for each line in the skipped range, so it works
194  // with line-based diffing.
195  assert(R.start.line <= R.end.line);
196  for (int Line = R.start.line; Line < R.end.line; ++Line) {
197  // Don't bother computing the offset for the end of the line, just use
198  // zero. The client will treat this highlighting kind specially, and
199  // highlight the entire line visually (i.e. not just to where the text
200  // on the line ends, but to the end of the screen).
201  NonConflicting.push_back({HighlightingKind::InactiveCode,
202  {Position{Line, 0}, Position{Line, 0}}});
203  }
204  }
205  // Re-sort the tokens because that's what the diffing expects.
206  llvm::sort(NonConflicting);
207  return NonConflicting;
208  }
209 
210 private:
211  const SourceManager &SourceMgr;
212  const LangOptions &LangOpts;
213  std::vector<HighlightingToken> Tokens;
214 };
215 
216 /// Produces highlightings, which are not captured by findExplicitReferences,
217 /// e.g. highlights dependent names and 'auto' as the underlying type.
218 class CollectExtraHighlightings
219  : public RecursiveASTVisitor<CollectExtraHighlightings> {
220 public:
221  CollectExtraHighlightings(HighlightingsBuilder &H) : H(H) {}
222 
223  bool VisitDecltypeTypeLoc(DecltypeTypeLoc L) {
224  if (auto K = kindForType(L.getTypePtr()))
225  H.addToken(L.getBeginLoc(), *K);
226  return true;
227  }
228 
229  bool VisitDeclaratorDecl(DeclaratorDecl *D) {
230  auto *AT = D->getType()->getContainedAutoType();
231  if (!AT)
232  return true;
233  if (auto K = kindForType(AT->getDeducedType().getTypePtrOrNull()))
234  H.addToken(D->getTypeSpecStartLoc(), *K);
235  return true;
236  }
237 
238  bool VisitOverloadExpr(OverloadExpr *E) {
239  if (!E->decls().empty())
240  return true; // handled by findExplicitReferences.
241  H.addToken(E->getNameLoc(), HighlightingKind::DependentName);
242  return true;
243  }
244 
245  bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
246  H.addToken(E->getMemberNameInfo().getLoc(),
248  return true;
249  }
250 
251  bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
252  H.addToken(E->getNameInfo().getLoc(), HighlightingKind::DependentName);
253  return true;
254  }
255 
256  bool VisitDependentNameTypeLoc(DependentNameTypeLoc L) {
257  H.addToken(L.getNameLoc(), HighlightingKind::DependentType);
258  return true;
259  }
260 
261  bool VisitDependentTemplateSpecializationTypeLoc(
262  DependentTemplateSpecializationTypeLoc L) {
263  H.addToken(L.getTemplateNameLoc(), HighlightingKind::DependentType);
264  return true;
265  }
266 
267  // findExplicitReferences will walk nested-name-specifiers and
268  // find anything that can be resolved to a Decl. However, non-leaf
269  // components of nested-name-specifiers which are dependent names
270  // (kind "Identifier") cannot be resolved to a decl, so we visit
271  // them here.
272  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc Q) {
273  if (NestedNameSpecifier *NNS = Q.getNestedNameSpecifier()) {
274  if (NNS->getKind() == NestedNameSpecifier::Identifier)
275  H.addToken(Q.getLocalBeginLoc(), HighlightingKind::DependentType);
276  }
277  return RecursiveASTVisitor::TraverseNestedNameSpecifierLoc(Q);
278  }
279 
280 private:
281  HighlightingsBuilder &H;
282 };
283 
284 // Encode binary data into base64.
285 // This was copied from compiler-rt/lib/fuzzer/FuzzerUtil.cpp.
286 // FIXME: Factor this out into llvm/Support?
287 std::string encodeBase64(const llvm::SmallVectorImpl<char> &Bytes) {
288  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
289  "abcdefghijklmnopqrstuvwxyz"
290  "0123456789+/";
291  std::string Res;
292  size_t I;
293  for (I = 0; I + 2 < Bytes.size(); I += 3) {
294  uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8) + Bytes[I + 2];
295  Res += Table[(X >> 18) & 63];
296  Res += Table[(X >> 12) & 63];
297  Res += Table[(X >> 6) & 63];
298  Res += Table[X & 63];
299  }
300  if (I + 1 == Bytes.size()) {
301  uint32_t X = (Bytes[I] << 16);
302  Res += Table[(X >> 18) & 63];
303  Res += Table[(X >> 12) & 63];
304  Res += "==";
305  } else if (I + 2 == Bytes.size()) {
306  uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8);
307  Res += Table[(X >> 18) & 63];
308  Res += Table[(X >> 12) & 63];
309  Res += Table[(X >> 6) & 63];
310  Res += "=";
311  }
312  return Res;
313 }
314 
315 void write32be(uint32_t I, llvm::raw_ostream &OS) {
316  std::array<char, 4> Buf;
317  llvm::support::endian::write32be(Buf.data(), I);
318  OS.write(Buf.data(), Buf.size());
319 }
320 
321 void write16be(uint16_t I, llvm::raw_ostream &OS) {
322  std::array<char, 2> Buf;
323  llvm::support::endian::write16be(Buf.data(), I);
324  OS.write(Buf.data(), Buf.size());
325 }
326 
327 // Get the highlightings on \c Line where the first entry of line is at \c
328 // StartLineIt. If it is not at \c StartLineIt an empty vector is returned.
329 ArrayRef<HighlightingToken>
330 takeLine(ArrayRef<HighlightingToken> AllTokens,
331  ArrayRef<HighlightingToken>::iterator StartLineIt, int Line) {
332  return ArrayRef<HighlightingToken>(StartLineIt, AllTokens.end())
333  .take_while([Line](const HighlightingToken &Token) {
334  return Token.R.start.line == Line;
335  });
336 }
337 } // namespace
338 
339 std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST) {
340  auto &C = AST.getASTContext();
341  // Add highlightings for AST nodes.
342  HighlightingsBuilder Builder(AST.getSourceManager(), C.getLangOpts());
343  // Highlight 'decltype' and 'auto' as their underlying types.
344  CollectExtraHighlightings(Builder).TraverseAST(C);
345  // Highlight all decls and references coming from the AST.
347  if (auto Kind = kindForReference(R))
348  Builder.addToken(R.NameLoc, *Kind);
349  });
350  // Add highlightings for macro references.
351  for (const auto &SIDToRefs : AST.getMacros().MacroRefs) {
352  for (const auto &M : SIDToRefs.second)
353  Builder.addToken({HighlightingKind::Macro, M});
354  }
355  for (const auto &M : AST.getMacros().UnknownMacros)
356  Builder.addToken({HighlightingKind::Macro, M});
357 
358  return std::move(Builder).collect(AST);
359 }
360 
361 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, HighlightingKind K) {
362  switch (K) {
364  return OS << "Variable";
366  return OS << "LocalVariable";
368  return OS << "Parameter";
370  return OS << "Function";
372  return OS << "Method";
374  return OS << "StaticMethod";
376  return OS << "Field";
378  return OS << "StaticField";
380  return OS << "Class";
382  return OS << "Enum";
384  return OS << "EnumConstant";
386  return OS << "Typedef";
388  return OS << "DependentType";
390  return OS << "DependentName";
392  return OS << "Namespace";
394  return OS << "TemplateParameter";
396  return OS << "Primitive";
398  return OS << "Macro";
400  return OS << "InactiveCode";
401  }
402  llvm_unreachable("invalid HighlightingKind");
403 }
404 
405 std::vector<LineHighlightings>
406 diffHighlightings(ArrayRef<HighlightingToken> New,
407  ArrayRef<HighlightingToken> Old) {
408  assert(std::is_sorted(New.begin(), New.end()) &&
409  "New must be a sorted vector");
410  assert(std::is_sorted(Old.begin(), Old.end()) &&
411  "Old must be a sorted vector");
412 
413  // FIXME: There's an edge case when tokens span multiple lines. If the first
414  // token on the line started on a line above the current one and the rest of
415  // the line is the equal to the previous one than we will remove all
416  // highlights but the ones for the token spanning multiple lines. This means
417  // that when we get into the LSP layer the only highlights that will be
418  // visible are the ones for the token spanning multiple lines.
419  // Example:
420  // EndOfMultilineToken Token Token Token
421  // If "Token Token Token" don't differ from previously the line is
422  // incorrectly removed. Suggestion to fix is to separate any multiline tokens
423  // into one token for every line it covers. This requires reading from the
424  // file buffer to figure out the length of each line though.
425  std::vector<LineHighlightings> DiffedLines;
426  // ArrayRefs to the current line in the highlightings.
427  ArrayRef<HighlightingToken> NewLine(New.begin(),
428  /*length*/ static_cast<size_t>(0));
429  ArrayRef<HighlightingToken> OldLine(Old.begin(),
430  /*length*/ static_cast<size_t>(0));
431  auto NewEnd = New.end();
432  auto OldEnd = Old.end();
433  auto NextLineNumber = [&]() {
434  int NextNew = NewLine.end() != NewEnd ? NewLine.end()->R.start.line
435  : std::numeric_limits<int>::max();
436  int NextOld = OldLine.end() != OldEnd ? OldLine.end()->R.start.line
437  : std::numeric_limits<int>::max();
438  return std::min(NextNew, NextOld);
439  };
440 
441  for (int LineNumber = 0; NewLine.end() < NewEnd || OldLine.end() < OldEnd;
442  LineNumber = NextLineNumber()) {
443  NewLine = takeLine(New, NewLine.end(), LineNumber);
444  OldLine = takeLine(Old, OldLine.end(), LineNumber);
445  if (NewLine != OldLine) {
446  DiffedLines.push_back({LineNumber, NewLine, /*IsInactive=*/false});
447 
448  // Turn a HighlightingKind::InactiveCode token into the IsInactive flag.
449  auto &AddedLine = DiffedLines.back();
450  llvm::erase_if(AddedLine.Tokens, [&](const HighlightingToken &T) {
451  if (T.Kind == HighlightingKind::InactiveCode) {
452  AddedLine.IsInactive = true;
453  return true;
454  }
455  return false;
456  });
457  }
458  }
459 
460  return DiffedLines;
461 }
462 
464  return std::tie(L.R, L.Kind) == std::tie(R.R, R.Kind);
465 }
467  return std::tie(L.R, L.Kind) < std::tie(R.R, R.Kind);
468 }
470  return std::tie(L.Line, L.Tokens) == std::tie(R.Line, R.Tokens);
471 }
472 
473 std::vector<SemanticHighlightingInformation>
474 toSemanticHighlightingInformation(llvm::ArrayRef<LineHighlightings> Tokens) {
475  if (Tokens.size() == 0)
476  return {};
477 
478  // FIXME: Tokens might be multiple lines long (block comments) in this case
479  // this needs to add multiple lines for those tokens.
480  std::vector<SemanticHighlightingInformation> Lines;
481  Lines.reserve(Tokens.size());
482  for (const auto &Line : Tokens) {
483  llvm::SmallVector<char, 128> LineByteTokens;
484  llvm::raw_svector_ostream OS(LineByteTokens);
485  for (const auto &Token : Line.Tokens) {
486  // Writes the token to LineByteTokens in the byte format specified by the
487  // LSP proposal. Described below.
488  // |<---- 4 bytes ---->|<-- 2 bytes -->|<--- 2 bytes -->|
489  // | character | length | index |
490 
491  write32be(Token.R.start.character, OS);
492  write16be(Token.R.end.character - Token.R.start.character, OS);
493  write16be(static_cast<int>(Token.Kind), OS);
494  }
495 
496  Lines.push_back({Line.Line, encodeBase64(LineByteTokens), Line.IsInactive});
497  }
498 
499  return Lines;
500 }
501 
502 llvm::StringRef toTextMateScope(HighlightingKind Kind) {
503  // FIXME: Add scopes for C and Objective C.
504  switch (Kind) {
506  return "entity.name.function.cpp";
508  return "entity.name.function.method.cpp";
510  return "entity.name.function.method.static.cpp";
512  return "variable.other.cpp";
514  return "variable.other.local.cpp";
516  return "variable.parameter.cpp";
518  return "variable.other.field.cpp";
520  return "variable.other.field.static.cpp";
522  return "entity.name.type.class.cpp";
524  return "entity.name.type.enum.cpp";
526  return "variable.other.enummember.cpp";
528  return "entity.name.type.typedef.cpp";
530  return "entity.name.type.dependent.cpp";
532  return "entity.name.other.dependent.cpp";
534  return "entity.name.namespace.cpp";
536  return "entity.name.type.template.cpp";
538  return "storage.type.primitive.cpp";
540  return "entity.name.function.preprocessor.cpp";
542  return "meta.disabled";
543  }
544  llvm_unreachable("unhandled HighlightingKind");
545 }
546 
547 } // namespace clangd
548 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
const FunctionDecl * Decl
std::vector< HighlightingToken > Tokens
Information about a reference written in the source code, independent of the actual AST node that thi...
Definition: FindTarget.h:119
bool operator==(const HoverInfo::Param &LHS, const HoverInfo::Param &RHS)
Definition: Hover.h:78
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:534
llvm::StringRef toTextMateScope(HighlightingKind Kind)
Converts a HighlightingKind to a corresponding TextMate scope (https://manual.macromates.com/en/language_grammars).
Documents should not be synced at all.
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:56
ASTContext & getASTContext()
Note that the returned ast will not contain decls from the preamble that were not deserialized during...
Definition: ParsedAST.cpp:424
BindArgumentKind Kind
std::vector< Range > UnknownMacros
Definition: CollectMacros.h:32
llvm::DenseMap< SymbolID, std::vector< Range > > MacroRefs
Definition: CollectMacros.h:28
std::vector< SemanticHighlightingInformation > toSemanticHighlightingInformation(llvm::ArrayRef< LineHighlightings > Tokens)
Convert to LSP&#39;s semantic highlighting information.
SourceLocation NameLoc
Start location of the last name part, i.e. &#39;foo&#39; in &#39;ns::foo<int>&#39;.
Definition: FindTarget.h:123
bool operator<(const Ref &L, const Ref &R)
Definition: Ref.h:58
static constexpr llvm::StringLiteral Name
std::vector< LineHighlightings > diffHighlightings(ArrayRef< HighlightingToken > New, ArrayRef< HighlightingToken > Old)
Return a line-by-line diff between two highlightings.
llvm::Optional< Range > getTokenRange(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation TokLoc)
Returns the taken range at TokLoc.
Definition: SourceCode.cpp:227
Stores and provides access to parsed AST.
Definition: ParsedAST.h:46
CodeCompletionBuilder Builder
SourceManager & getSourceManager()
Definition: ParsedAST.h:73
const MainFileMacros & getMacros() const
Gets all macro references (definition, expansions) present in the main file, including those in the p...
Definition: ParsedAST.cpp:444
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static URISchemeRegistry::Add< TestScheme > X(TestScheme::Scheme, "Test schema")
Contains all information about highlightings on a single line.
void findExplicitReferences(const Stmt *S, llvm::function_ref< void(ReferenceLoc)> Out)
Recursively traverse S and report all references explicitly written in the code.
Definition: FindTarget.cpp:850
static GeneratorRegistry::Add< MDGenerator > MD(MDGenerator::Format, "Generator for MD output.")
CharSourceRange Range
SourceRange for the file name.
std::vector< HighlightingToken > getSemanticHighlightings(ParsedAST &AST)
const Expr * E
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
NodeType Type
unsigned Lines