clang-tools  10.0.0git
SymbolCollector.cpp
Go to the documentation of this file.
1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "Logger.h"
16 #include "SourceCode.h"
17 #include "SymbolLocation.h"
18 #include "URI.h"
19 #include "index/SymbolID.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/DeclBase.h"
22 #include "clang/AST/DeclCXX.h"
23 #include "clang/AST/DeclTemplate.h"
24 #include "clang/Basic/SourceLocation.h"
25 #include "clang/Basic/SourceManager.h"
26 #include "clang/Basic/Specifiers.h"
27 #include "clang/Index/IndexSymbol.h"
28 #include "clang/Index/IndexingAction.h"
29 #include "clang/Index/USRGeneration.h"
30 #include "clang/Lex/Preprocessor.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 
36 namespace clang {
37 namespace clangd {
38 namespace {
39 
40 /// If \p ND is a template specialization, returns the described template.
41 /// Otherwise, returns \p ND.
42 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
43  if (auto T = ND.getDescribedTemplate())
44  return *T;
45  return ND;
46 }
47 
48 // Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
49 // current working directory of the given SourceManager if the Path is not an
50 // absolute path. If failed, this resolves relative paths against \p FallbackDir
51 // to get an absolute path. Then, this tries creating an URI for the absolute
52 // path with schemes specified in \p Opts. This returns an URI with the first
53 // working scheme, if there is any; otherwise, this returns None.
54 //
55 // The Path can be a path relative to the build directory, or retrieved from
56 // the SourceManager.
57 std::string toURI(const SourceManager &SM, llvm::StringRef Path,
58  const SymbolCollector::Options &Opts) {
59  llvm::SmallString<128> AbsolutePath(Path);
60  if (auto File = SM.getFileManager().getFile(Path)) {
61  if (auto CanonPath = getCanonicalPath(*File, SM)) {
62  AbsolutePath = *CanonPath;
63  }
64  }
65  // We don't perform is_absolute check in an else branch because makeAbsolute
66  // might return a relative path on some InMemoryFileSystems.
67  if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
68  llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
69  llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
70  return URI::create(AbsolutePath).toString();
71 }
72 
73 // All proto generated headers should start with this line.
74 static const char *PROTO_HEADER_COMMENT =
75  "// Generated by the protocol buffer compiler. DO NOT EDIT!";
76 
77 // Checks whether the decl is a private symbol in a header generated by
78 // protobuf compiler.
79 // To identify whether a proto header is actually generated by proto compiler,
80 // we check whether it starts with PROTO_HEADER_COMMENT.
81 // FIXME: make filtering extensible when there are more use cases for symbol
82 // filters.
83 bool isPrivateProtoDecl(const NamedDecl &ND) {
84  const auto &SM = ND.getASTContext().getSourceManager();
85  auto Loc = nameLocation(ND, SM);
86  auto FileName = SM.getFilename(Loc);
87  if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h"))
88  return false;
89  auto FID = SM.getFileID(Loc);
90  // Double check that this is an actual protobuf header.
91  if (!SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT))
92  return false;
93 
94  // ND without identifier can be operators.
95  if (ND.getIdentifier() == nullptr)
96  return false;
97  auto Name = ND.getIdentifier()->getName();
98  if (!Name.contains('_'))
99  return false;
100  // Nested proto entities (e.g. Message::Nested) have top-level decls
101  // that shouldn't be used (Message_Nested). Ignore them completely.
102  // The nested entities are dangling type aliases, we may want to reconsider
103  // including them in the future.
104  // For enum constants, SOME_ENUM_CONSTANT is not private and should be
105  // indexed. Outer_INNER is private. This heuristic relies on naming style, it
106  // will include OUTER_INNER and exclude some_enum_constant.
107  // FIXME: the heuristic relies on naming style (i.e. no underscore in
108  // user-defined names) and can be improved.
109  return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
110 }
111 
112 // We only collect #include paths for symbols that are suitable for global code
113 // completion, except for namespaces since #include path for a namespace is hard
114 // to define.
115 bool shouldCollectIncludePath(index::SymbolKind Kind) {
116  using SK = index::SymbolKind;
117  switch (Kind) {
118  case SK::Macro:
119  case SK::Enum:
120  case SK::Struct:
121  case SK::Class:
122  case SK::Union:
123  case SK::TypeAlias:
124  case SK::Using:
125  case SK::Function:
126  case SK::Variable:
127  case SK::EnumConstant:
128  return true;
129  default:
130  return false;
131  }
132 }
133 
134 // Return the symbol range of the token at \p TokLoc.
135 std::pair<SymbolLocation::Position, SymbolLocation::Position>
136 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
137  const LangOptions &LangOpts) {
138  auto CreatePosition = [&SM](SourceLocation Loc) {
139  auto LSPLoc = sourceLocToPosition(SM, Loc);
140  SymbolLocation::Position Pos;
141  Pos.setLine(LSPLoc.line);
142  Pos.setColumn(LSPLoc.character);
143  return Pos;
144  };
145 
146  auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
147  return {CreatePosition(TokLoc),
148  CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
149 }
150 
151 // Return the symbol location of the token at \p TokLoc.
152 llvm::Optional<SymbolLocation>
153 getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
154  const SymbolCollector::Options &Opts,
155  const clang::LangOptions &LangOpts,
156  std::string &FileURIStorage) {
157  auto Path = SM.getFilename(TokLoc);
158  if (Path.empty())
159  return None;
160  FileURIStorage = toURI(SM, Path, Opts);
161  SymbolLocation Result;
162  Result.FileURI = FileURIStorage.c_str();
163  auto Range = getTokenRange(TokLoc, SM, LangOpts);
164  Result.Start = Range.first;
165  Result.End = Range.second;
166 
167  return Result;
168 }
169 
170 // Checks whether \p ND is a definition of a TagDecl (class/struct/enum/union)
171 // in a header file, in which case clangd would prefer to use ND as a canonical
172 // declaration.
173 // FIXME: handle symbol types that are not TagDecl (e.g. functions), if using
174 // the first seen declaration as canonical declaration is not a good enough
175 // heuristic.
176 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
177  const auto &SM = ND.getASTContext().getSourceManager();
178  return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
179  isa<TagDecl>(&ND) && !isInsideMainFile(ND.getLocation(), SM);
180 }
181 
182 RefKind toRefKind(index::SymbolRoleSet Roles) {
183  return static_cast<RefKind>(static_cast<unsigned>(RefKind::All) & Roles);
184 }
185 
186 bool shouldIndexRelation(const index::SymbolRelation &R) {
187  // We currently only index BaseOf relations, for type hierarchy subtypes.
188  return R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf);
189 }
190 
191 } // namespace
192 
193 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
194 
195 void SymbolCollector::initialize(ASTContext &Ctx) {
196  ASTCtx = &Ctx;
197  CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
198  CompletionTUInfo =
199  std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
200 }
201 
202 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
203  const ASTContext &ASTCtx,
204  const Options &Opts,
205  bool IsMainFileOnly) {
206  // Skip anonymous declarations, e.g (anonymous enum/class/struct).
207  if (ND.getDeclName().isEmpty())
208  return false;
209 
210  // Skip main-file symbols if we are not collecting them.
211  if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
212  return false;
213 
214  // Skip symbols in anonymous namespaces in header files.
215  if (!IsMainFileOnly && ND.isInAnonymousNamespace())
216  return false;
217 
218  // We want most things but not "local" symbols such as symbols inside
219  // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
220  // FIXME: Need a matcher for ExportDecl in order to include symbols declared
221  // within an export.
222  const auto *DeclCtx = ND.getDeclContext();
223  switch (DeclCtx->getDeclKind()) {
224  case Decl::TranslationUnit:
225  case Decl::Namespace:
226  case Decl::LinkageSpec:
227  case Decl::Enum:
228  case Decl::ObjCProtocol:
229  case Decl::ObjCInterface:
230  case Decl::ObjCCategory:
231  case Decl::ObjCCategoryImpl:
232  case Decl::ObjCImplementation:
233  break;
234  default:
235  // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
236  // easier to cast.
237  if (!isa<RecordDecl>(DeclCtx))
238  return false;
239  }
240 
241  // Avoid indexing internal symbols in protobuf generated headers.
242  if (isPrivateProtoDecl(ND))
243  return false;
244  return true;
245 }
246 
247 // Always return true to continue indexing.
249  const Decl *D, index::SymbolRoleSet Roles,
250  llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
251  index::IndexDataConsumer::ASTNodeInfo ASTNode) {
252  assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
253  assert(CompletionAllocator && CompletionTUInfo);
254  assert(ASTNode.OrigD);
255  // Indexing API puts canonical decl into D, which might not have a valid
256  // source location for implicit/built-in decls. Fallback to original decl in
257  // such cases.
258  if (D->getLocation().isInvalid())
259  D = ASTNode.OrigD;
260  // If OrigD is an declaration associated with a friend declaration and it's
261  // not a definition, skip it. Note that OrigD is the occurrence that the
262  // collector is currently visiting.
263  if ((ASTNode.OrigD->getFriendObjectKind() !=
264  Decl::FriendObjectKind::FOK_None) &&
265  !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
266  return true;
267  // A declaration created for a friend declaration should not be used as the
268  // canonical declaration in the index. Use OrigD instead, unless we've already
269  // picked a replacement for D
270  if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
271  D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
272  const NamedDecl *ND = dyn_cast<NamedDecl>(D);
273  if (!ND)
274  return true;
275 
276  // Mark D as referenced if this is a reference coming from the main file.
277  // D may not be an interesting symbol, but it's cheaper to check at the end.
278  auto &SM = ASTCtx->getSourceManager();
279  if (Opts.CountReferences &&
280  (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
281  SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
282  ReferencedDecls.insert(ND);
283 
284  auto ID = getSymbolID(ND);
285  if (!ID)
286  return true;
287 
288  // Note: we need to process relations for all decl occurrences, including
289  // refs, because the indexing code only populates relations for specific
290  // occurrences. For example, RelationBaseOf is only populated for the
291  // occurrence inside the base-specifier.
292  processRelations(*ND, *ID, Relations);
293 
294  bool CollectRef = static_cast<unsigned>(Opts.RefFilter) & Roles;
295  bool IsOnlyRef =
296  !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
297  static_cast<unsigned>(index::SymbolRole::Definition)));
298 
299  if (IsOnlyRef && !CollectRef)
300  return true;
301 
302  // ND is the canonical (i.e. first) declaration. If it's in the main file
303  // (which is not a header), then no public declaration was visible, so assume
304  // it's main-file only.
305  bool IsMainFileOnly =
306  SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
307  !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
308  ASTCtx->getLangOpts());
309  // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
310  if (ASTNode.OrigD->isImplicit() ||
311  !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
312  return true;
313  // Do not store references to main-file symbols.
314  // Unlike other fields, e.g. Symbols (which use spelling locations), we use
315  // file locations for references (as it aligns the behavior of clangd's
316  // AST-based xref).
317  // FIXME: we should try to use the file locations for other fields.
318  if (CollectRef && !IsMainFileOnly && !isa<NamespaceDecl>(ND) &&
319  (Opts.RefsInHeaders ||
320  SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
321  DeclRefs[ND].emplace_back(SM.getFileLoc(Loc), Roles);
322  // Don't continue indexing if this is a mere reference.
323  if (IsOnlyRef)
324  return true;
325 
326  // FIXME: ObjCPropertyDecl are not properly indexed here:
327  // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
328  // not a NamedDecl.
329  auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
330  if (!OriginalDecl)
331  return true;
332 
333  const Symbol *BasicSymbol = Symbols.find(*ID);
334  if (!BasicSymbol) // Regardless of role, ND is the canonical declaration.
335  BasicSymbol = addDeclaration(*ND, std::move(*ID), IsMainFileOnly);
336  else if (isPreferredDeclaration(*OriginalDecl, Roles))
337  // If OriginalDecl is preferred, replace the existing canonical
338  // declaration (e.g. a class forward declaration). There should be at most
339  // one duplicate as we expect to see only one preferred declaration per
340  // TU, because in practice they are definitions.
341  BasicSymbol = addDeclaration(*OriginalDecl, std::move(*ID), IsMainFileOnly);
342 
343  if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
344  addDefinition(*OriginalDecl, *BasicSymbol);
345 
346  return true;
347 }
348 
349 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
350  assert(PP.get());
351  const auto &SM = PP->getSourceManager();
352  const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
353  assert(MainFileEntry);
354 
355  const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
356  // Add macro references.
357  for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
358  for (const auto &Range : IDToRefs.second) {
359  Ref R;
364  R.Location.FileURI = MainFileURI.c_str();
365  // FIXME: Add correct RefKind information to MainFileMacros.
367  Refs.insert(IDToRefs.first, R);
368  }
369  }
370 }
371 
372 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
373  const MacroInfo *MI,
374  index::SymbolRoleSet Roles,
375  SourceLocation Loc) {
376  assert(PP.get());
377 
378  const auto &SM = PP->getSourceManager();
379  auto DefLoc = MI->getDefinitionLoc();
380  auto SpellingLoc = SM.getSpellingLoc(Loc);
381  bool IsMainFileSymbol = SM.isInMainFile(SM.getExpansionLoc(DefLoc));
382 
383  // Builtin macros don't have useful locations and aren't needed in completion.
384  if (MI->isBuiltinMacro())
385  return true;
386 
387  // Also avoid storing predefined macros like __DBL_MIN__.
388  if (SM.isWrittenInBuiltinFile(DefLoc))
389  return true;
390 
391  auto ID = getSymbolID(Name->getName(), MI, SM);
392  if (!ID)
393  return true;
394 
395  // Do not store references to main-file macros.
396  if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileSymbol &&
397  (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
398  MacroRefs[*ID].push_back({Loc, Roles});
399 
400  // Collect symbols.
401  if (!Opts.CollectMacro)
402  return true;
403 
404  // Skip main-file macros if we are not collecting them.
405  if (IsMainFileSymbol && !Opts.CollectMainFileSymbols)
406  return false;
407 
408  // Mark the macro as referenced if this is a reference coming from the main
409  // file. The macro may not be an interesting symbol, but it's cheaper to check
410  // at the end.
411  if (Opts.CountReferences &&
412  (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
413  SM.getFileID(SpellingLoc) == SM.getMainFileID())
414  ReferencedMacros.insert(Name);
415 
416  // Don't continue indexing if this is a mere reference.
417  // FIXME: remove macro with ID if it is undefined.
418  if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
419  Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
420  return true;
421 
422  // Only collect one instance in case there are multiple.
423  if (Symbols.find(*ID) != nullptr)
424  return true;
425 
426  Symbol S;
427  S.ID = std::move(*ID);
428  S.Name = Name->getName();
429  if (!IsMainFileSymbol) {
432  }
433  S.SymInfo = index::getSymbolInfoForMacro(*MI);
434  std::string FileURI;
435  // FIXME: use the result to filter out symbols.
436  shouldIndexFile(SM.getFileID(Loc));
437  if (auto DeclLoc =
438  getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
439  S.CanonicalDeclaration = *DeclLoc;
440 
441  CodeCompletionResult SymbolCompletion(Name);
442  const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
443  *PP, *CompletionAllocator, *CompletionTUInfo);
444  std::string Signature;
445  std::string SnippetSuffix;
446  getSignature(*CCS, &Signature, &SnippetSuffix);
447  S.Signature = Signature;
449 
450  IndexedMacros.insert(Name);
451  setIncludeLocation(S, DefLoc);
452  Symbols.insert(S);
453  return true;
454 }
455 
456 void SymbolCollector::processRelations(
457  const NamedDecl &ND, const SymbolID &ID,
458  ArrayRef<index::SymbolRelation> Relations) {
459  // Store subtype relations.
460  if (!dyn_cast<TagDecl>(&ND))
461  return;
462 
463  for (const auto &R : Relations) {
464  if (!shouldIndexRelation(R))
465  continue;
466 
467  const Decl *Object = R.RelatedSymbol;
468 
469  auto ObjectID = getSymbolID(Object);
470  if (!ObjectID)
471  continue;
472 
473  // Record the relation.
474  // TODO: There may be cases where the object decl is not indexed for some
475  // reason. Those cases should probably be removed in due course, but for
476  // now there are two possible ways to handle it:
477  // (A) Avoid storing the relation in such cases.
478  // (B) Store it anyways. Clients will likely lookup() the SymbolID
479  // in the index and find nothing, but that's a situation they
480  // probably need to handle for other reasons anyways.
481  // We currently do (B) because it's simpler.
482  this->Relations.insert(Relation{ID, RelationKind::BaseOf, *ObjectID});
483  }
484 }
485 
486 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
487  if (Opts.CollectIncludePath)
488  if (shouldCollectIncludePath(S.SymInfo.Kind))
489  // Use the expansion location to get the #include header since this is
490  // where the symbol is exposed.
491  IncludeFiles[S.ID] =
492  PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
493 }
494 
496  // At the end of the TU, add 1 to the refcount of all referenced symbols.
497  auto IncRef = [this](const SymbolID &ID) {
498  if (const auto *S = Symbols.find(ID)) {
499  Symbol Inc = *S;
500  ++Inc.References;
501  Symbols.insert(Inc);
502  }
503  };
504  for (const NamedDecl *ND : ReferencedDecls) {
505  if (auto ID = getSymbolID(ND)) {
506  IncRef(*ID);
507  }
508  }
509  if (Opts.CollectMacro) {
510  assert(PP);
511  // First, drop header guards. We can't identify these until EOF.
512  for (const IdentifierInfo *II : IndexedMacros) {
513  if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
514  if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
515  if (MI->isUsedForHeaderGuard())
516  Symbols.erase(*ID);
517  }
518  // Now increment refcounts.
519  for (const IdentifierInfo *II : ReferencedMacros) {
520  if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
521  if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
522  IncRef(*ID);
523  }
524  }
525  // Fill in IncludeHeaders.
526  // We delay this until end of TU so header guards are all resolved.
527  // Symbols in slabs aren' mutable, so insert() has to walk all the strings
528  // :-(
529  llvm::SmallString<256> QName;
530  for (const auto &Entry : IncludeFiles)
531  if (const Symbol *S = Symbols.find(Entry.first)) {
532  QName = S->Scope;
533  QName.append(S->Name);
534  if (auto Header = getIncludeHeader(QName, Entry.second)) {
535  Symbol NewSym = *S;
536  NewSym.IncludeHeaders.push_back({*Header, 1});
537  Symbols.insert(NewSym);
538  }
539  }
540 
541  const auto &SM = ASTCtx->getSourceManager();
542  llvm::DenseMap<FileID, std::string> URICache;
543  auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
544  auto Found = URICache.find(FID);
545  if (Found == URICache.end()) {
546  if (auto *FileEntry = SM.getFileEntryForID(FID)) {
547  auto FileURI = toURI(SM, FileEntry->getName(), Opts);
548  Found = URICache.insert({FID, FileURI}).first;
549  } else {
550  // Ignore cases where we can not find a corresponding file entry for
551  // given location, e.g. symbols formed via macro concatenation.
552  return None;
553  }
554  }
555  return Found->second;
556  };
557  auto CollectRef =
558  [&](SymbolID ID,
559  const std::pair<SourceLocation, index::SymbolRoleSet> &LocAndRole) {
560  auto FileID = SM.getFileID(LocAndRole.first);
561  // FIXME: use the result to filter out references.
562  shouldIndexFile(FileID);
563  if (auto FileURI = GetURI(FileID)) {
564  auto Range =
565  getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts());
566  Ref R;
567  R.Location.Start = Range.first;
568  R.Location.End = Range.second;
569  R.Location.FileURI = FileURI->c_str();
570  R.Kind = toRefKind(LocAndRole.second);
571  Refs.insert(ID, R);
572  }
573  };
574  // Populate Refs slab from MacroRefs.
575  for (const auto &IDAndRefs : MacroRefs) {
576  for (const auto &LocAndRole : IDAndRefs.second)
577  CollectRef(IDAndRefs.first, LocAndRole);
578  }
579  // Populate Refs slab from DeclRefs.
580  if (auto MainFileURI = GetURI(SM.getMainFileID())) {
581  for (const auto &It : DeclRefs) {
582  if (auto ID = getSymbolID(It.first)) {
583  for (const auto &LocAndRole : It.second)
584  CollectRef(*ID, LocAndRole);
585  }
586  }
587  }
588 
589  ReferencedDecls.clear();
590  ReferencedMacros.clear();
591  DeclRefs.clear();
592  FilesToIndexCache.clear();
593  HeaderIsSelfContainedCache.clear();
594  IncludeFiles.clear();
595 }
596 
597 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
598  bool IsMainFileOnly) {
599  auto &Ctx = ND.getASTContext();
600  auto &SM = Ctx.getSourceManager();
601 
602  Symbol S;
603  S.ID = std::move(ID);
604  std::string QName = printQualifiedName(ND);
605  // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
606  // for consistency with CodeCompletionString and a clean name/signature split.
607  std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
608  std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
609  S.TemplateSpecializationArgs = TemplateSpecializationArgs;
610 
611  // We collect main-file symbols, but do not use them for code completion.
612  if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
614  if (isImplementationDetail(&ND))
616  if (!IsMainFileOnly)
618  S.SymInfo = index::getSymbolInfo(&ND);
619  std::string FileURI;
620  auto Loc = nameLocation(ND, SM);
621  assert(Loc.isValid() && "Invalid source location for NamedDecl");
622  // FIXME: use the result to filter out symbols.
623  shouldIndexFile(SM.getFileID(Loc));
624  if (auto DeclLoc =
625  getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
626  S.CanonicalDeclaration = *DeclLoc;
627 
628  S.Origin = Opts.Origin;
629  if (ND.getAvailability() == AR_Deprecated)
631 
632  // Add completion info.
633  // FIXME: we may want to choose a different redecl, or combine from several.
634  assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
635  // We use the primary template, as clang does during code completion.
636  CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
637  const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
638  *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
639  *CompletionTUInfo,
640  /*IncludeBriefComments*/ false);
641  std::string Documentation =
642  formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
643  /*CommentsFromHeaders=*/true));
645  if (Opts.StoreAllDocumentation)
646  S.Documentation = Documentation;
647  Symbols.insert(S);
648  return Symbols.find(S.ID);
649  }
650  S.Documentation = Documentation;
651  std::string Signature;
652  std::string SnippetSuffix;
653  getSignature(*CCS, &Signature, &SnippetSuffix);
654  S.Signature = Signature;
656  std::string ReturnType = getReturnType(*CCS);
658 
659  llvm::Optional<OpaqueType> TypeStorage;
661  TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
662  if (TypeStorage)
663  S.Type = TypeStorage->raw();
664  }
665 
666  Symbols.insert(S);
667  setIncludeLocation(S, ND.getLocation());
668  return Symbols.find(S.ID);
669 }
670 
671 void SymbolCollector::addDefinition(const NamedDecl &ND,
672  const Symbol &DeclSym) {
673  if (DeclSym.Definition)
674  return;
675  // If we saw some forward declaration, we end up copying the symbol.
676  // This is not ideal, but avoids duplicating the "is this a definition" check
677  // in clang::index. We should only see one definition.
678  Symbol S = DeclSym;
679  std::string FileURI;
680  const auto &SM = ND.getASTContext().getSourceManager();
681  auto Loc = nameLocation(ND, SM);
682  // FIXME: use the result to filter out symbols.
683  shouldIndexFile(SM.getFileID(Loc));
684  if (auto DefLoc =
685  getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
686  S.Definition = *DefLoc;
687  Symbols.insert(S);
688 }
689 
690 /// Gets a canonical include (URI of the header or <header> or "header") for
691 /// header of \p FID (which should usually be the *expansion* file).
692 /// Returns None if includes should not be inserted for this file.
693 llvm::Optional<std::string>
694 SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) {
695  const SourceManager &SM = ASTCtx->getSourceManager();
696  const FileEntry *FE = SM.getFileEntryForID(FID);
697  if (!FE || FE->getName().empty())
698  return llvm::None;
699  llvm::StringRef Filename = FE->getName();
700  // If a file is mapped by canonical headers, use that mapping, regardless
701  // of whether it's an otherwise-good header (header guards etc).
702  if (Opts.Includes) {
703  llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
704  // If we had a mapping, always use it.
705  if (Canonical.startswith("<") || Canonical.startswith("\""))
706  return Canonical.str();
707  if (Canonical != Filename)
708  return toURI(SM, Canonical, Opts);
709  }
710  if (!isSelfContainedHeader(FID)) {
711  // A .inc or .def file is often included into a real header to define
712  // symbols (e.g. LLVM tablegen files).
713  if (Filename.endswith(".inc") || Filename.endswith(".def"))
714  return getIncludeHeader(QName, SM.getFileID(SM.getIncludeLoc(FID)));
715  // Conservatively refuse to insert #includes to files without guards.
716  return llvm::None;
717  }
718  // Standard case: just insert the file itself.
719  return toURI(SM, Filename, Opts);
720 }
721 
722 bool SymbolCollector::isSelfContainedHeader(FileID FID) {
723  // The real computation (which will be memoized).
724  auto Compute = [&] {
725  const SourceManager &SM = ASTCtx->getSourceManager();
726  const FileEntry *FE = SM.getFileEntryForID(FID);
727  if (!FE)
728  return false;
729  if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE))
730  return false;
731  // This pattern indicates that a header can't be used without
732  // particular preprocessor state, usually set up by another header.
733  if (isDontIncludeMeHeader(SM.getBufferData(FID)))
734  return false;
735  return true;
736  };
737 
738  auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
739  if (R.second)
740  R.first->second = Compute();
741  return R.first->second;
742 }
743 
744 // Is Line an #if or #ifdef directive?
745 static bool isIf(llvm::StringRef Line) {
746  Line = Line.ltrim();
747  if (!Line.consume_front("#"))
748  return false;
749  Line = Line.ltrim();
750  return Line.startswith("if");
751 }
752 // Is Line an #error directive mentioning includes?
753 static bool isErrorAboutInclude(llvm::StringRef Line) {
754  Line = Line.ltrim();
755  if (!Line.consume_front("#"))
756  return false;
757  Line = Line.ltrim();
758  if (!Line.startswith("error"))
759  return false;
760  return Line.contains_lower("includ"); // Matches "include" or "including".
761 }
762 
763 bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
764  llvm::StringRef Line;
765  // Only sniff up to 100 lines or 10KB.
766  Content = Content.take_front(100 * 100);
767  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
768  std::tie(Line, Content) = Content.split('\n');
769  if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
770  return true;
771  }
772  return false;
773 }
774 
776  if (!Opts.FileFilter)
777  return true;
778  auto I = FilesToIndexCache.try_emplace(FID);
779  if (I.second)
780  I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
781  return I.first->second;
782 }
783 
784 } // namespace clangd
785 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI, index::SymbolRoleSet Roles, SourceLocation Loc) override
const FunctionDecl * Decl
std::string printQualifiedName(const NamedDecl &ND)
Returns the qualified name of ND.
Definition: AST.cpp:169
SourceLocation nameLocation(const clang::Decl &D, const SourceManager &SM)
Find the source location of the identifier for D.
Definition: AST.cpp:162
Position start
The range&#39;s start position.
Definition: Protocol.h:158
Represents a relation between two symbols.
Definition: Relation.h:29
llvm::Optional< SymbolID > getSymbolID(const Decl *D)
Gets the symbol ID for a declaration, if possible.
Definition: AST.cpp:286
const Symbol * find(const SymbolID &ID)
Returns the symbol with an ID, if it exists. Valid until insert/remove.
Definition: Symbol.h:211
clang::find_all_symbols::SymbolInfo::SymbolKind SymbolKind
Definition: SymbolInfo.cpp:21
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:599
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:534
Represents a symbol occurrence in the source file.
Definition: Ref.h:52
Symbol is visible to other files (not e.g. a static helper function).
Definition: Symbol.h:125
void insert(const Symbol &S)
Adds a symbol, overwriting any existing one with the same ID.
Definition: Symbol.cpp:50
def make_absolute(f, directory)
llvm::StringRef Scope
The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
Definition: Symbol.h:44
std::string getDocComment(const ASTContext &Ctx, const CodeCompletionResult &Result, bool CommentsFromHeaders)
Gets a minimally formatted documentation comment of Result, with comment markers stripped.
Documents should not be synced at all.
std::string printTemplateSpecializationArgs(const NamedDecl &ND)
Prints template arguments of a decl as written in the source code, including enclosing &#39;<&#39; and &#39;>&#39;...
Definition: AST.cpp:249
void erase(const SymbolID &ID)
Removes the symbol with an ID, if it exists.
Definition: Symbol.h:208
void initialize(ASTContext &Ctx) override
unsigned References
The number of translation units that reference this symbol from their main file.
Definition: Symbol.h:59
SymbolID ID
The ID of the symbol.
Definition: Symbol.h:38
index::SymbolInfo SymInfo
The symbol information, like symbol kind.
Definition: Symbol.h:40
std::string getReturnType(const CodeCompletionString &CCS)
Gets detail to be used as the detail field in an LSP completion item.
BindArgumentKind Kind
llvm::DenseMap< SymbolID, std::vector< Range > > MacroRefs
Definition: CollectMacros.h:28
Symbol is an implementation detail.
Definition: Symbol.h:123
SymbolLocation Definition
The location of the symbol&#39;s definition, if one was found.
Definition: Symbol.h:47
Context Ctx
std::vector< SymbolDetails > getSymbolInfo(ParsedAST &AST, Position Pos)
Get info about symbols at Pos.
Definition: XRefs.cpp:519
std::string QName
std::string Filename
Filename as a string.
Whether or not this symbol is meant to be used for the code completion.
Definition: Symbol.h:119
bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx)
llvm::SmallVector< IncludeHeaderWithReferences, 1 > IncludeHeaders
One Symbol can potentially be included via different headers.
Definition: Symbol.h:111
SymbolFlag Flags
Definition: Symbol.h:128
static bool isErrorAboutInclude(llvm::StringRef Line)
llvm::StringRef Signature
A brief description of the symbol that can be appended in the completion candidate list...
Definition: Symbol.h:65
SymbolLocation Location
The source location where the symbol is named.
Definition: Ref.h:54
std::string Signature
llvm::StringRef Documentation
Documentation including comment for the symbol declaration.
Definition: Symbol.h:76
std::string Path
A typedef to represent a file path.
Definition: Path.h:20
static constexpr llvm::StringLiteral Name
SymbolLocation CanonicalDeclaration
The location of the preferred declaration of the symbol.
Definition: Symbol.h:56
RefKind
Describes the kind of a cross-reference.
Definition: Ref.h:28
PathRef FileName
std::string formatDocumentation(const CodeCompletionString &CCS, llvm::StringRef DocComment)
Assembles formatted documentation for a completion result.
llvm::Optional< Range > getTokenRange(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation TokLoc)
Returns the taken range at TokLoc.
Definition: SourceCode.cpp:227
std::string ReturnType
Position Pos
Definition: SourceCode.cpp:772
void handleMacros(const MainFileMacros &MacroRefsToIndex)
bool shouldIndexFile(FileID FID)
Returns true if we are interested in references and declarations from FID.
std::string SnippetSuffix
Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc)
Turn a SourceLocation into a [line, column] pair.
Definition: SourceCode.cpp:200
bool CollectMainFileSymbols
Collect symbols local to main-files, such as static functions and symbols inside an anonymous namespa...
bool handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles, ArrayRef< index::SymbolRelation > Relations, SourceLocation Loc, index::IndexDataConsumer::ASTNodeInfo ASTNode) override
int line
Line position in a document (zero-based).
Definition: Protocol.h:129
int character
Character offset on a line in a document (zero-based).
Definition: Protocol.h:134
static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, const Options &Opts, bool IsMainFileSymbol)
Returns true is ND should be collected.
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:197
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
Position Start
The symbol range, using half-open range [Start, End).
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::StringRef Name
The unqualified name of the symbol, e.g. "bar" (for ns::bar).
Definition: Symbol.h:42
CharSourceRange Range
SourceRange for the file name.
llvm::Optional< std::string > getCanonicalPath(const FileEntry *F, const SourceManager &SourceMgr)
Get the canonical path of F.
Definition: SourceCode.cpp:622
void getSignature(const CodeCompletionString &CCS, std::string *Signature, std::string *Snippet, std::string *RequiredQualifiers, bool CompletingPattern)
Formats the signature for an item, as a display string and snippet.
void insert(const SymbolID &ID, const Ref &S)
Adds a ref to the slab. Deep copy: Strings will be owned by the slab.
Definition: Ref.cpp:35
bool isHeaderFile(llvm::StringRef FileName, llvm::Optional< LangOptions > LangOpts)
Infers whether this is a header from the FileName and LangOpts (if presents).
bool isImplementationDetail(const Decl *D)
Returns true if the declaration is considered implementation detail based on heuristics.
Definition: AST.cpp:157
llvm::StringRef CompletionSnippetSuffix
What to insert when completing this symbol, after the symbol name.
Definition: Symbol.h:74
Indicates if the symbol is deprecated.
Definition: Symbol.h:121
static bool isIf(llvm::StringRef Line)
Position end
The range&#39;s end position.
Definition: Protocol.h:161
llvm::StringRef Type
Raw representation of the OpaqueType of the symbol, used for scoring purposes.
Definition: Symbol.h:85
SymbolOrigin Origin
Where this symbol came from. Usually an index provides a constant value.
Definition: Symbol.h:61
llvm::StringRef TemplateSpecializationArgs
Argument list in human-readable format, will be displayed to help disambiguate between different spec...
Definition: Symbol.h:69
llvm::StringRef ReturnType
Type when this symbol is used in an expression.
Definition: Symbol.h:80
RefKind Kind
Definition: Ref.h:55
static llvm::Optional< OpaqueType > fromCompletionResult(ASTContext &Ctx, const CodeCompletionResult &R)
Create a type from a code completion result.