clang-tools  9.0.0
FileIndex.cpp
Go to the documentation of this file.
1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "FileIndex.h"
10 #include "ClangdUnit.h"
11 #include "Logger.h"
12 #include "SymbolCollector.h"
14 #include "index/Index.h"
15 #include "index/MemIndex.h"
16 #include "index/Merge.h"
17 #include "index/SymbolOrigin.h"
18 #include "index/dex/Dex.h"
19 #include "clang/Index/IndexingAction.h"
20 #include "clang/Lex/MacroInfo.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include <memory>
27 
28 namespace clang {
29 namespace clangd {
30 
31 static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
32  llvm::ArrayRef<Decl *> DeclsToIndex,
33  const CanonicalIncludes &Includes,
34  bool IsIndexMainAST) {
36  CollectorOpts.CollectIncludePath = true;
37  CollectorOpts.Includes = &Includes;
38  CollectorOpts.CountReferences = false;
39  CollectorOpts.Origin = SymbolOrigin::Dynamic;
40 
41  index::IndexingOptions IndexOpts;
42  // We only need declarations, because we don't count references.
43  IndexOpts.SystemSymbolFilter =
44  index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
45  IndexOpts.IndexFunctionLocals = false;
46  if (IsIndexMainAST) {
47  // We only collect refs when indexing main AST.
48  CollectorOpts.RefFilter = RefKind::All;
49  // Comments for main file can always be obtained from sema, do not store
50  // them in the index.
51  CollectorOpts.StoreAllDocumentation = false;
52  } else {
53  IndexOpts.IndexMacrosInPreprocessor = true;
54  CollectorOpts.CollectMacro = true;
55  CollectorOpts.StoreAllDocumentation = true;
56  }
57 
58  SymbolCollector Collector(std::move(CollectorOpts));
59  Collector.setPreprocessor(PP);
60  index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
61 
62  const auto &SM = AST.getSourceManager();
63  const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
64  std::string FileName = MainFileEntry ? MainFileEntry->getName() : "";
65 
66  auto Syms = Collector.takeSymbols();
67  auto Refs = Collector.takeRefs();
68  auto Relations = Collector.takeRelations();
69  vlog("index AST for {0} (main={1}): \n"
70  " symbol slab: {2} symbols, {3} bytes\n"
71  " ref slab: {4} symbols, {5} refs, {6} bytes\n"
72  " relations slab: {7} relations, {8} bytes",
73  FileName, IsIndexMainAST, Syms.size(), Syms.bytes(), Refs.size(),
75  return std::make_tuple(std::move(Syms), std::move(Refs),
76  std::move(Relations));
77 }
78 
80  return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(),
82  /*IsIndexMainAST=*/true);
83 }
84 
85 SlabTuple indexHeaderSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
86  const CanonicalIncludes &Includes) {
87  std::vector<Decl *> DeclsToIndex(
88  AST.getTranslationUnitDecl()->decls().begin(),
89  AST.getTranslationUnitDecl()->decls().end());
90  return indexSymbols(AST, std::move(PP), DeclsToIndex, Includes,
91  /*IsIndexMainAST=*/false);
92 }
93 
94 void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Symbols,
95  std::unique_ptr<RefSlab> Refs,
96  std::unique_ptr<RelationSlab> Relations,
97  bool CountReferences) {
98  std::lock_guard<std::mutex> Lock(Mutex);
99  if (!Symbols)
100  FileToSymbols.erase(Path);
101  else
102  FileToSymbols[Path] = std::move(Symbols);
103  if (!Refs) {
104  FileToRefs.erase(Path);
105  } else {
106  RefSlabAndCountReferences Item;
107  Item.CountReferences = CountReferences;
108  Item.Slab = std::move(Refs);
109  FileToRefs[Path] = std::move(Item);
110  }
111  if (!Relations)
112  FileToRelations.erase(Path);
113  else
114  FileToRelations[Path] = std::move(Relations);
115 }
116 
117 std::unique_ptr<SymbolIndex>
119  std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
120  std::vector<std::shared_ptr<RefSlab>> RefSlabs;
121  std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
122  std::vector<RefSlab *> MainFileRefs;
123  {
124  std::lock_guard<std::mutex> Lock(Mutex);
125  for (const auto &FileAndSymbols : FileToSymbols)
126  SymbolSlabs.push_back(FileAndSymbols.second);
127  for (const auto &FileAndRefs : FileToRefs) {
128  RefSlabs.push_back(FileAndRefs.second.Slab);
129  if (FileAndRefs.second.CountReferences)
130  MainFileRefs.push_back(RefSlabs.back().get());
131  }
132  for (const auto &FileAndRelations : FileToRelations)
133  RelationSlabs.push_back(FileAndRelations.second);
134  }
135  std::vector<const Symbol *> AllSymbols;
136  std::vector<Symbol> SymsStorage;
137  switch (DuplicateHandle) {
139  llvm::DenseMap<SymbolID, Symbol> Merged;
140  for (const auto &Slab : SymbolSlabs) {
141  for (const auto &Sym : *Slab) {
142  assert(Sym.References == 0 &&
143  "Symbol with non-zero references sent to FileSymbols");
144  auto I = Merged.try_emplace(Sym.ID, Sym);
145  if (!I.second)
146  I.first->second = mergeSymbol(I.first->second, Sym);
147  }
148  }
149  for (const RefSlab *Refs : MainFileRefs)
150  for (const auto &Sym : *Refs) {
151  auto It = Merged.find(Sym.first);
152  // This might happen while background-index is still running.
153  if (It == Merged.end())
154  continue;
155  It->getSecond().References += Sym.second.size();
156  }
157  SymsStorage.reserve(Merged.size());
158  for (auto &Sym : Merged) {
159  SymsStorage.push_back(std::move(Sym.second));
160  AllSymbols.push_back(&SymsStorage.back());
161  }
162  break;
163  }
165  llvm::DenseSet<SymbolID> AddedSymbols;
166  for (const auto &Slab : SymbolSlabs)
167  for (const auto &Sym : *Slab) {
168  assert(Sym.References == 0 &&
169  "Symbol with non-zero references sent to FileSymbols");
170  if (AddedSymbols.insert(Sym.ID).second)
171  AllSymbols.push_back(&Sym);
172  }
173  break;
174  }
175  }
176 
177  std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
178  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
179  {
180  llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
181  size_t Count = 0;
182  for (const auto &RefSlab : RefSlabs)
183  for (const auto &Sym : *RefSlab) {
184  MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
185  Count += Sym.second.size();
186  }
187  RefsStorage.reserve(Count);
188  AllRefs.reserve(MergedRefs.size());
189  for (auto &Sym : MergedRefs) {
190  auto &SymRefs = Sym.second;
191  // Sorting isn't required, but yields more stable results over rebuilds.
192  llvm::sort(SymRefs);
193  llvm::copy(SymRefs, back_inserter(RefsStorage));
194  AllRefs.try_emplace(
195  Sym.first,
196  llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
197  SymRefs.size()));
198  }
199  }
200 
201  std::vector<Relation> AllRelations;
202  for (const auto &RelationSlab : RelationSlabs) {
203  for (const auto &R : *RelationSlab)
204  AllRelations.push_back(R);
205  }
206 
207  size_t StorageSize =
208  RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
209  for (const auto &Slab : SymbolSlabs)
210  StorageSize += Slab->bytes();
211  for (const auto &RefSlab : RefSlabs)
212  StorageSize += RefSlab->bytes();
213  for (const auto &RelationSlab : RelationSlabs)
214  StorageSize += RelationSlab->bytes();
215 
216  // Index must keep the slabs and contiguous ranges alive.
217  switch (Type) {
218  case IndexType::Light:
219  return llvm::make_unique<MemIndex>(
220  llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
221  std::move(AllRelations),
222  std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
223  std::move(RefsStorage), std::move(SymsStorage)),
224  StorageSize);
225  case IndexType::Heavy:
226  return llvm::make_unique<dex::Dex>(
227  llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
228  std::move(AllRelations),
229  std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
230  std::move(RefsStorage), std::move(SymsStorage)),
231  StorageSize);
232  }
233  llvm_unreachable("Unknown clangd::IndexType");
234 }
235 
237  : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex),
238  PreambleIndex(llvm::make_unique<MemIndex>()),
239  MainFileIndex(llvm::make_unique<MemIndex>()) {}
240 
242  std::shared_ptr<Preprocessor> PP,
243  const CanonicalIncludes &Includes) {
244  auto Slabs = indexHeaderSymbols(AST, std::move(PP), Includes);
245  PreambleSymbols.update(
246  Path, llvm::make_unique<SymbolSlab>(std::move(std::get<0>(Slabs))),
247  llvm::make_unique<RefSlab>(),
248  llvm::make_unique<RelationSlab>(std::move(std::get<2>(Slabs))),
249  /*CountReferences=*/false);
250  PreambleIndex.reset(
251  PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
253 }
254 
256  auto Contents = indexMainDecls(AST);
257  MainFileSymbols.update(
258  Path, llvm::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),
259  llvm::make_unique<RefSlab>(std::move(std::get<1>(Contents))),
260  llvm::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),
261  /*CountReferences=*/true);
262  MainFileIndex.reset(
264 }
265 
266 } // namespace clangd
267 } // namespace clang
std::tuple< SymbolSlab, RefSlab, RelationSlab > SlabTuple
Definition: FileIndex.h:134
llvm::StringRef Contents
Some operations such as code completion produce a set of candidates.
SymbolCollector::Options CollectorOpts
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:69
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
IndexType
Select between in-memory index implementations, which have tradeoffs.
Definition: FileIndex.h:31
Represents a symbol occurrence in the source file.
Definition: Ref.h:52
SlabTuple indexMainDecls(ParsedAST &AST)
Retrieves symbols and refs of local top level decls in AST (i.e.
Definition: FileIndex.cpp:79
Collect declarations (symbols) from an AST.
void updatePreamble(PathRef Path, ASTContext &AST, std::shared_ptr< Preprocessor > PP, const CanonicalIncludes &Includes)
Update preamble symbols of file Path with all declarations in AST and macros in PP.
Definition: FileIndex.cpp:241
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:23
size_t numRefs() const
Definition: Ref.h:84
ArrayRef< Decl * > getLocalTopLevelDecls()
This function returns top-level decls present in the main file of the AST.
Definition: ClangdUnit.cpp:493
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
void updateMain(PathRef Path, ParsedAST &AST)
Update symbols and references from main file Path with indexMainDecls.
Definition: FileIndex.cpp:255
ASTContext & getASTContext()
Note that the returned ast will not contain decls from the preamble that were not deserialized during...
Definition: ClangdUnit.cpp:477
DuplicateHandling
How to handle duplicated symbols across multiple files.
Definition: FileIndex.h:39
Maps a definition location onto an #include file, based on a set of filename rules.
const CanonicalIncludes & getCanonicalIncludes() const
Definition: ClangdUnit.cpp:535
bool StoreAllDocumentation
If set to true, SymbolCollector will collect doc for all symbols.
MemIndex is a naive in-memory index suitable for a small set of symbols.
Definition: MemIndex.h:19
std::shared_ptr< Preprocessor > getPreprocessorPtr()
Definition: ClangdUnit.cpp:485
std::string Path
A typedef to represent a file path.
Definition: Path.h:20
size_t size() const
Gets the number of symbols.
Definition: Ref.h:83
std::unique_ptr< SymbolIndex > buildIndex(IndexType, DuplicateHandling DuplicateHandle=DuplicateHandling::PickOne)
The index keeps the symbols alive.
Definition: FileIndex.cpp:118
Symbol mergeSymbol(const Symbol &L, const Symbol &R)
Definition: Merge.cpp:165
RelationSlab Relations
std::shared_ptr< SymbolCollector > Collector
PathRef FileName
SymbolSlab Symbols
Stores and provides access to parsed AST.
Definition: ClangdUnit.h:73
FileIndex(bool UseDex=true)
Definition: FileIndex.cpp:236
size_t bytes() const
Definition: Relation.h:56
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
void setPreprocessor(std::shared_ptr< Preprocessor > PP) override
size_t size() const
Definition: Relation.h:53
size_t bytes() const
Definition: Ref.h:87
RefSlab Refs
void update(PathRef Path, std::unique_ptr< SymbolSlab > Slab, std::unique_ptr< RefSlab > Refs, std::unique_ptr< RelationSlab > Relations, bool CountReferences)
Updates all symbols and refs in a file.
Definition: FileIndex.cpp:94
static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr< Preprocessor > PP, llvm::ArrayRef< Decl *> DeclsToIndex, const CanonicalIncludes &Includes, bool IsIndexMainAST)
Definition: FileIndex.cpp:31
NodeType Type
const CanonicalIncludes * Includes
If set, this is used to map symbol #include path to a potentially different #include path...
void reset(std::unique_ptr< SymbolIndex >)
Definition: Index.cpp:20
SlabTuple indexHeaderSymbols(ASTContext &AST, std::shared_ptr< Preprocessor > PP, const CanonicalIncludes &Includes)
Idex declarations from AST and macros from PP that are declared in included headers.
Definition: FileIndex.cpp:85
RefKind RefFilter
The symbol ref kinds that will be collected.