clang-tools  10.0.0git
Dex.h
Go to the documentation of this file.
1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This defines Dex - a symbol index implementation based on query iterators
11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12 /// While consuming more memory and having longer build stage due to
13 /// preprocessing, Dex will have substantially lower latency. It will also allow
14 /// efficient symbol searching which is crucial for operations like code
15 /// completion, and can be very important for a number of different code
16 /// transformations which will be eventually supported by Clangd.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 
23 #include "Iterator.h"
24 #include "PostingList.h"
25 #include "Token.h"
26 #include "Trigram.h"
27 #include "index/Index.h"
28 #include "index/MemIndex.h"
29 #include "index/Relation.h"
30 #include "index/SymbolCollector.h"
31 
32 namespace clang {
33 namespace clangd {
34 namespace dex {
35 
36 /// In-memory Dex trigram-based index implementation.
37 // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
38 // index so that it can be loaded from the disk. Since static index is not
39 // changed frequently, it's safe to assume that it has to be built only once
40 // (when the clangd process starts). Therefore, it can be easier to store built
41 // index on disk and then load it if available.
42 class Dex : public SymbolIndex {
43 public:
44  // All data must outlive this index.
45  template <typename SymbolRange, typename RefsRange, typename RelationsRange>
46  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
47  : Corpus(0) {
48  for (auto &&Sym : Symbols)
49  this->Symbols.push_back(&Sym);
50  for (auto &&Ref : Refs)
51  this->Refs.try_emplace(Ref.first, Ref.second);
52  for (auto &&Rel : Relations)
53  this->Relations[std::make_pair(Rel.Subject,
54  static_cast<uint8_t>(Rel.Predicate))]
55  .push_back(Rel.Object);
56  buildIndex();
57  }
58  // Symbols and Refs are owned by BackingData, Index takes ownership.
59  template <typename SymbolRange, typename RefsRange, typename RelationsRange,
60  typename Payload>
61  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
62  Payload &&BackingData, size_t BackingDataSize)
63  : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
64  std::forward<RelationsRange>(Relations)) {
65  KeepAlive = std::shared_ptr<void>(
66  std::make_shared<Payload>(std::move(BackingData)), nullptr);
67  this->BackingDataSize = BackingDataSize;
68  }
69 
70  /// Builds an index from slabs. The index takes ownership of the slab.
71  static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
72 
73  bool
74  fuzzyFind(const FuzzyFindRequest &Req,
75  llvm::function_ref<void(const Symbol &)> Callback) const override;
76 
77  void lookup(const LookupRequest &Req,
78  llvm::function_ref<void(const Symbol &)> Callback) const override;
79 
80  bool refs(const RefsRequest &Req,
81  llvm::function_ref<void(const Ref &)> Callback) const override;
82 
83  void relations(const RelationsRequest &Req,
84  llvm::function_ref<void(const SymbolID &, const Symbol &)>
85  Callback) const override;
86 
87  size_t estimateMemoryUsage() const override;
88 
89 private:
90  void buildIndex();
91  std::unique_ptr<Iterator> iterator(const Token &Tok) const;
92  std::unique_ptr<Iterator>
93  createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
94  std::unique_ptr<Iterator>
95  createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
96 
97  /// Stores symbols sorted in the descending order of symbol quality..
98  std::vector<const Symbol *> Symbols;
99  /// SymbolQuality[I] is the quality of Symbols[I].
100  std::vector<float> SymbolQuality;
101  llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
102  /// Inverted index is a mapping from the search token to the posting list,
103  /// which contains all items which can be characterized by such search token.
104  /// For example, if the search token is scope "std::", the corresponding
105  /// posting list would contain all indices of symbols defined in namespace
106  /// std. Inverted index is used to retrieve posting lists which are processed
107  /// during the fuzzyFind process.
108  llvm::DenseMap<Token, PostingList> InvertedIndex;
110  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
111  static_assert(sizeof(RelationKind) == sizeof(uint8_t),
112  "RelationKind should be of same size as a uint8_t");
113  llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
114  std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
115  // Size of memory retained by KeepAlive.
116  size_t BackingDataSize = 0;
117 };
118 
119 /// Returns Search Token for a number of parent directories of given Path.
120 /// Should be used within the index build process.
121 ///
122 /// This function is exposed for testing only.
123 std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
124 
125 } // namespace dex
126 } // namespace clangd
127 } // namespace clang
128 
129 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:69
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:85
Represents a symbol occurrence in the source file.
Definition: Ref.h:52
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Constructs iterators over tokens extracted from the query and exhausts it while applying Callback to ...
Definition: Dex.cpp:163
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
Definition: Dex.h:46
std::vector< std::string > generateProximityURIs(llvm::StringRef URIPath)
Returns Search Token for a number of parent directories of given Path.
Definition: Dex.cpp:303
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:26
void relations(const RelationsRequest &Req, llvm::function_ref< void(const SymbolID &, const Symbol &)> Callback) const override
Definition: Dex.cpp:269
void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol...
Definition: Dex.cpp:242
This defines posting list interface: a storage for identifiers of symbols which can be characterized ...
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Trigrams are attributes of the symbol unqualified name used to effectively extract symbols which can ...
std::string Payload
Definition: SourceCode.cpp:771
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, Payload &&BackingData, size_t BackingDataSize)
Definition: Dex.h:61
In-memory Dex trigram-based index implementation.
Definition: Dex.h:42
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition: Token.h:40
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Dex.cpp:291
bool refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const override
Finds all occurrences (e.g.
Definition: Dex.cpp:252
Token objects represent a characteristic of a symbol, which can be used to perform efficient search...