clang-tools  10.0.0git
FileIndex.cpp
Go to the documentation of this file.
1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "FileIndex.h"
10 #include "CollectMacros.h"
11 #include "Logger.h"
12 #include "ParsedAST.h"
13 #include "SymbolCollector.h"
15 #include "index/Index.h"
16 #include "index/MemIndex.h"
17 #include "index/Merge.h"
18 #include "index/SymbolOrigin.h"
19 #include "index/dex/Dex.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/Index/IndexingAction.h"
22 #include "clang/Index/IndexingOptions.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/StringRef.h"
29 #include <memory>
30 
31 namespace clang {
32 namespace clangd {
33 
34 static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
35  llvm::ArrayRef<Decl *> DeclsToIndex,
36  const MainFileMacros *MacroRefsToIndex,
37  const CanonicalIncludes &Includes,
38  bool IsIndexMainAST) {
40  CollectorOpts.CollectIncludePath = true;
41  CollectorOpts.Includes = &Includes;
42  CollectorOpts.CountReferences = false;
43  CollectorOpts.Origin = SymbolOrigin::Dynamic;
44 
45  index::IndexingOptions IndexOpts;
46  // We only need declarations, because we don't count references.
47  IndexOpts.SystemSymbolFilter =
48  index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
49  IndexOpts.IndexFunctionLocals = false;
50  if (IsIndexMainAST) {
51  // We only collect refs when indexing main AST.
52  CollectorOpts.RefFilter = RefKind::All;
53  // Comments for main file can always be obtained from sema, do not store
54  // them in the index.
55  CollectorOpts.StoreAllDocumentation = false;
56  } else {
57  IndexOpts.IndexMacrosInPreprocessor = true;
58  CollectorOpts.CollectMacro = true;
59  CollectorOpts.StoreAllDocumentation = true;
60  }
61 
62  SymbolCollector Collector(std::move(CollectorOpts));
63  Collector.setPreprocessor(PP);
64  if (MacroRefsToIndex)
65  Collector.handleMacros(*MacroRefsToIndex);
66  index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
67 
68  const auto &SM = AST.getSourceManager();
69  const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
70  std::string FileName = MainFileEntry ? MainFileEntry->getName() : "";
71 
72  auto Syms = Collector.takeSymbols();
73  auto Refs = Collector.takeRefs();
74  auto Relations = Collector.takeRelations();
75 
76  vlog("index AST for {0} (main={1}): \n"
77  " symbol slab: {2} symbols, {3} bytes\n"
78  " ref slab: {4} symbols, {5} refs, {6} bytes\n"
79  " relations slab: {7} relations, {8} bytes",
80  FileName, IsIndexMainAST, Syms.size(), Syms.bytes(), Refs.size(),
81  Refs.numRefs(), Refs.bytes(), Relations.size(), Relations.bytes());
82  return std::make_tuple(std::move(Syms), std::move(Refs),
83  std::move(Relations));
84 }
85 
87  return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(),
88  AST.getLocalTopLevelDecls(), &AST.getMacros(),
90  /*IsIndexMainAST=*/true);
91 }
92 
93 SlabTuple indexHeaderSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
94  const CanonicalIncludes &Includes) {
95  std::vector<Decl *> DeclsToIndex(
96  AST.getTranslationUnitDecl()->decls().begin(),
97  AST.getTranslationUnitDecl()->decls().end());
98  return indexSymbols(AST, std::move(PP), DeclsToIndex,
99  /*MainFileMacros=*/nullptr, Includes,
100  /*IsIndexMainAST=*/false);
101 }
102 
103 void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Symbols,
104  std::unique_ptr<RefSlab> Refs,
105  std::unique_ptr<RelationSlab> Relations,
106  bool CountReferences) {
107  std::lock_guard<std::mutex> Lock(Mutex);
108  if (!Symbols)
109  FileToSymbols.erase(Path);
110  else
111  FileToSymbols[Path] = std::move(Symbols);
112  if (!Refs) {
113  FileToRefs.erase(Path);
114  } else {
115  RefSlabAndCountReferences Item;
116  Item.CountReferences = CountReferences;
117  Item.Slab = std::move(Refs);
118  FileToRefs[Path] = std::move(Item);
119  }
120  if (!Relations)
121  FileToRelations.erase(Path);
122  else
123  FileToRelations[Path] = std::move(Relations);
124 }
125 
126 std::unique_ptr<SymbolIndex>
128  std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
129  std::vector<std::shared_ptr<RefSlab>> RefSlabs;
130  std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
131  std::vector<RefSlab *> MainFileRefs;
132  {
133  std::lock_guard<std::mutex> Lock(Mutex);
134  for (const auto &FileAndSymbols : FileToSymbols)
135  SymbolSlabs.push_back(FileAndSymbols.second);
136  for (const auto &FileAndRefs : FileToRefs) {
137  RefSlabs.push_back(FileAndRefs.second.Slab);
138  if (FileAndRefs.second.CountReferences)
139  MainFileRefs.push_back(RefSlabs.back().get());
140  }
141  for (const auto &FileAndRelations : FileToRelations)
142  RelationSlabs.push_back(FileAndRelations.second);
143  }
144  std::vector<const Symbol *> AllSymbols;
145  std::vector<Symbol> SymsStorage;
146  switch (DuplicateHandle) {
148  llvm::DenseMap<SymbolID, Symbol> Merged;
149  for (const auto &Slab : SymbolSlabs) {
150  for (const auto &Sym : *Slab) {
151  assert(Sym.References == 0 &&
152  "Symbol with non-zero references sent to FileSymbols");
153  auto I = Merged.try_emplace(Sym.ID, Sym);
154  if (!I.second)
155  I.first->second = mergeSymbol(I.first->second, Sym);
156  }
157  }
158  for (const RefSlab *Refs : MainFileRefs)
159  for (const auto &Sym : *Refs) {
160  auto It = Merged.find(Sym.first);
161  // This might happen while background-index is still running.
162  if (It == Merged.end())
163  continue;
164  It->getSecond().References += Sym.second.size();
165  }
166  SymsStorage.reserve(Merged.size());
167  for (auto &Sym : Merged) {
168  SymsStorage.push_back(std::move(Sym.second));
169  AllSymbols.push_back(&SymsStorage.back());
170  }
171  break;
172  }
174  llvm::DenseSet<SymbolID> AddedSymbols;
175  for (const auto &Slab : SymbolSlabs)
176  for (const auto &Sym : *Slab) {
177  assert(Sym.References == 0 &&
178  "Symbol with non-zero references sent to FileSymbols");
179  if (AddedSymbols.insert(Sym.ID).second)
180  AllSymbols.push_back(&Sym);
181  }
182  break;
183  }
184  }
185 
186  std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
187  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
188  {
189  llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
190  size_t Count = 0;
191  for (const auto &RefSlab : RefSlabs)
192  for (const auto &Sym : *RefSlab) {
193  MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
194  Count += Sym.second.size();
195  }
196  RefsStorage.reserve(Count);
197  AllRefs.reserve(MergedRefs.size());
198  for (auto &Sym : MergedRefs) {
199  auto &SymRefs = Sym.second;
200  // Sorting isn't required, but yields more stable results over rebuilds.
201  llvm::sort(SymRefs);
202  llvm::copy(SymRefs, back_inserter(RefsStorage));
203  AllRefs.try_emplace(
204  Sym.first,
205  llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
206  SymRefs.size()));
207  }
208  }
209 
210  std::vector<Relation> AllRelations;
211  for (const auto &RelationSlab : RelationSlabs) {
212  for (const auto &R : *RelationSlab)
213  AllRelations.push_back(R);
214  }
215 
216  size_t StorageSize =
217  RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
218  for (const auto &Slab : SymbolSlabs)
219  StorageSize += Slab->bytes();
220  for (const auto &RefSlab : RefSlabs)
221  StorageSize += RefSlab->bytes();
222  for (const auto &RelationSlab : RelationSlabs)
223  StorageSize += RelationSlab->bytes();
224 
225  // Index must keep the slabs and contiguous ranges alive.
226  switch (Type) {
227  case IndexType::Light:
228  return std::make_unique<MemIndex>(
229  llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
230  std::move(AllRelations),
231  std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
232  std::move(RefsStorage), std::move(SymsStorage)),
233  StorageSize);
234  case IndexType::Heavy:
235  return std::make_unique<dex::Dex>(
236  llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
237  std::move(AllRelations),
238  std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
239  std::move(RefsStorage), std::move(SymsStorage)),
240  StorageSize);
241  }
242  llvm_unreachable("Unknown clangd::IndexType");
243 }
244 
246  : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex),
247  PreambleIndex(std::make_unique<MemIndex>()),
248  MainFileIndex(std::make_unique<MemIndex>()) {}
249 
251  std::shared_ptr<Preprocessor> PP,
252  const CanonicalIncludes &Includes) {
253  auto Slabs = indexHeaderSymbols(AST, std::move(PP), Includes);
254  PreambleSymbols.update(
255  Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Slabs))),
256  std::make_unique<RefSlab>(),
257  std::make_unique<RelationSlab>(std::move(std::get<2>(Slabs))),
258  /*CountReferences=*/false);
259  PreambleIndex.reset(
260  PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
262 }
263 
265  auto Contents = indexMainDecls(AST);
266  MainFileSymbols.update(
267  Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),
268  std::make_unique<RefSlab>(std::move(std::get<1>(Contents))),
269  std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),
270  /*CountReferences=*/true);
271  MainFileIndex.reset(
273 }
274 
275 } // namespace clangd
276 } // namespace clang
std::tuple< SymbolSlab, RefSlab, RelationSlab > SlabTuple
Definition: FileIndex.h:136
llvm::StringRef Contents
SymbolCollector::Options CollectorOpts
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:69
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
IndexType
Select between in-memory index implementations, which have tradeoffs.
Definition: FileIndex.h:33
Represents a symbol occurrence in the source file.
Definition: Ref.h:52
SlabTuple indexMainDecls(ParsedAST &AST)
Retrieves symbols and refs of local top level decls in AST (i.e.
Definition: FileIndex.cpp:86
Collect declarations (symbols) from an AST.
void updatePreamble(PathRef Path, ASTContext &AST, std::shared_ptr< Preprocessor > PP, const CanonicalIncludes &Includes)
Update preamble symbols of file Path with all declarations in AST and macros in PP.
Definition: FileIndex.cpp:250
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:23
size_t numRefs() const
Definition: Ref.h:84
ArrayRef< Decl * > getLocalTopLevelDecls()
This function returns top-level decls present in the main file of the AST.
Definition: ParsedAST.cpp:440
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
void updateMain(PathRef Path, ParsedAST &AST)
Update symbols and references from main file Path with indexMainDecls.
Definition: FileIndex.cpp:264
ASTContext & getASTContext()
Note that the returned ast will not contain decls from the preamble that were not deserialized during...
Definition: ParsedAST.cpp:424
DuplicateHandling
How to handle duplicated symbols across multiple files.
Definition: FileIndex.h:41
Maps a definition location onto an #include file, based on a set of filename rules.
const CanonicalIncludes & getCanonicalIncludes() const
Definition: ParsedAST.cpp:484
bool StoreAllDocumentation
If set to true, SymbolCollector will collect doc for all symbols.
MemIndex is a naive in-memory index suitable for a small set of symbols.
Definition: MemIndex.h:19
std::shared_ptr< Preprocessor > getPreprocessorPtr()
Definition: ParsedAST.cpp:432
std::string Path
A typedef to represent a file path.
Definition: Path.h:20
size_t size() const
Gets the number of symbols.
Definition: Ref.h:83
std::unique_ptr< SymbolIndex > buildIndex(IndexType, DuplicateHandling DuplicateHandle=DuplicateHandling::PickOne)
The index keeps the symbols alive.
Definition: FileIndex.cpp:127
Symbol mergeSymbol(const Symbol &L, const Symbol &R)
Definition: Merge.cpp:172
std::shared_ptr< SymbolCollector > Collector
PathRef FileName
SymbolSlab Symbols
void handleMacros(const MainFileMacros &MacroRefsToIndex)
Stores and provides access to parsed AST.
Definition: ParsedAST.h:46
FileIndex(bool UseDex=true)
Definition: FileIndex.cpp:245
static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr< Preprocessor > PP, llvm::ArrayRef< Decl *> DeclsToIndex, const MainFileMacros *MacroRefsToIndex, const CanonicalIncludes &Includes, bool IsIndexMainAST)
Definition: FileIndex.cpp:34
size_t bytes() const
Definition: Relation.h:60
const MainFileMacros & getMacros() const
Gets all macro references (definition, expansions) present in the main file, including those in the p...
Definition: ParsedAST.cpp:444
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
void setPreprocessor(std::shared_ptr< Preprocessor > PP) override
size_t bytes() const
Definition: Ref.h:87
RefSlab Refs
void update(PathRef Path, std::unique_ptr< SymbolSlab > Slab, std::unique_ptr< RefSlab > Refs, std::unique_ptr< RelationSlab > Relations, bool CountReferences)
Updates all symbols and refs in a file.
Definition: FileIndex.cpp:103
NodeType Type
const CanonicalIncludes * Includes
If set, this is used to map symbol #include path to a potentially different #include path...
void reset(std::unique_ptr< SymbolIndex >)
Definition: Index.cpp:20
SlabTuple indexHeaderSymbols(ASTContext &AST, std::shared_ptr< Preprocessor > PP, const CanonicalIncludes &Includes)
Index declarations from AST and macros from PP that are declared in included headers.
Definition: FileIndex.cpp:93
RefKind RefFilter
The symbol ref kinds that will be collected.