17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/ScopedPrinter.h"
28 auto Size = Symbols.bytes() + Refs.bytes();
31 auto Data = std::make_pair(std::move(Symbols), std::move(Refs));
32 return std::make_unique<Dex>(Data.first, Data.second, Rels, std::move(Data),
39 const Token RestrictedForCodeCompletion =
48 llvm::DenseMap<Trigram, std::vector<DocID>> TrigramDocs;
49 std::vector<DocID> RestrictedCCDocs;
50 llvm::StringMap<std::vector<DocID>> TypeDocs;
51 llvm::StringMap<std::vector<DocID>> ScopeDocs;
52 llvm::StringMap<std::vector<DocID>> ProximityDocs;
53 std::vector<Trigram> TrigramScratch;
62 for (
Trigram T : TrigramScratch)
63 TrigramDocs[T].push_back(D);
64 ScopeDocs[Sym.
Scope].push_back(D);
66 for (
const auto &ProximityURI :
68 ProximityDocs[ProximityURI].push_back(D);
70 RestrictedCCDocs.push_back(D);
71 if (!Sym.
Type.empty())
72 TypeDocs[Sym.
Type].push_back(D);
76 llvm::DenseMap<Token, PostingList> build() {
77 llvm::DenseMap<Token, PostingList> Result(
79 RestrictedCCDocs.size() +
82 ProximityDocs.size());
83 for (
const auto &
E : TrigramDocs)
85 for (
const auto &
E : TypeDocs)
87 for (
const auto &
E : ScopeDocs)
89 for (
const auto &
E : ProximityDocs)
91 if (!RestrictedCCDocs.empty())
92 Result.try_emplace(RestrictedForCodeCompletion, RestrictedCCDocs);
99 void Dex::buildIndex() {
100 this->Corpus = dex::Corpus(Symbols.size());
101 std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
103 for (
size_t I = 0; I < Symbols.size(); ++I) {
104 const Symbol *Sym = Symbols[I];
105 LookupTable[Sym->ID] = Sym;
106 ScoredSymbols[I] = {
quality(*Sym), Sym};
111 llvm::sort(ScoredSymbols, std::greater<std::pair<float, const Symbol *>>());
114 SymbolQuality.resize(Symbols.size());
116 for (
size_t I = 0; I < ScoredSymbols.size(); ++I) {
117 SymbolQuality[I] = ScoredSymbols[I].first;
118 Symbols[I] = ScoredSymbols[I].second;
123 for (
DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank)
124 Builder.add(*Symbols[SymbolRank], SymbolRank);
125 InvertedIndex =
Builder.build();
128 std::unique_ptr<Iterator> Dex::iterator(
const Token &Tok)
const {
129 auto It = InvertedIndex.find(Tok);
130 return It == InvertedIndex.end() ? Corpus.
none()
131 : It->second.iterator(&It->first);
135 std::unique_ptr<Iterator> Dex::createFileProximityIterator(
136 llvm::ArrayRef<std::string> ProximityPaths)
const {
137 std::vector<std::unique_ptr<Iterator>> BoostingIterators;
139 llvm::StringSet<> ParentURIs;
140 llvm::StringMap<SourceParams> Sources;
141 for (
const auto &
Path : ProximityPaths) {
142 Sources[
Path] = SourceParams();
145 for (
const auto &ProximityURI : PathProximityURIs)
146 ParentURIs.insert(ProximityURI);
150 SymbolRelevanceSignals PathProximitySignals;
153 URIDistance DistanceCalculator(Sources);
154 PathProximitySignals.FileProximityMatch = &DistanceCalculator;
158 for (
const auto &ParentURI : ParentURIs.keys()) {
162 PathProximitySignals.SymbolURI = ParentURI;
163 BoostingIterators.push_back(
164 Corpus.
boost(std::move(It), PathProximitySignals.evaluate()));
167 BoostingIterators.push_back(Corpus.
all());
168 return Corpus.
unionOf(std::move(BoostingIterators));
172 std::unique_ptr<Iterator>
173 Dex::createTypeBoostingIterator(llvm::ArrayRef<std::string> Types)
const {
174 std::vector<std::unique_ptr<Iterator>> BoostingIterators;
175 SymbolRelevanceSignals PreferredTypeSignals;
176 PreferredTypeSignals.TypeMatchesPreferred =
true;
177 auto Boost = PreferredTypeSignals.evaluate();
178 for (
const auto &T : Types)
179 BoostingIterators.push_back(
181 BoostingIterators.push_back(Corpus.
all());
182 return Corpus.
unionOf(std::move(BoostingIterators));
190 assert(!StringRef(Req.
Query).contains(
"::") &&
191 "There must be no :: in query.");
196 bool More = !Req.
Query.empty() && Req.
Query.size() < 3;
198 std::vector<std::unique_ptr<Iterator>> Criteria;
203 std::vector<std::unique_ptr<Iterator>> TrigramIterators;
204 for (
const auto &
Trigram : TrigramTokens)
205 TrigramIterators.push_back(iterator(
Trigram));
209 std::vector<std::unique_ptr<Iterator>> ScopeIterators;
210 for (
const auto &Scope : Req.
Scopes)
213 ScopeIterators.push_back(
218 Criteria.push_back(createFileProximityIterator(Req.
ProximityPaths));
220 Criteria.push_back(createTypeBoostingIterator(Req.
PreferredTypes));
223 Criteria.push_back(iterator(RestrictedForCodeCompletion));
234 vlog(
"Dex query tree: {0}", *Root);
236 using IDAndScore = std::pair<DocID, float>;
237 std::vector<IDAndScore> IDAndScores =
consume(*Root);
239 auto Compare = [](
const IDAndScore &LHS,
const IDAndScore &RHS) {
240 return LHS.second > RHS.second;
242 TopN<IDAndScore, decltype(Compare)> Top(
243 Req.
Limit ? *Req.
Limit : std::numeric_limits<size_t>::max(), Compare);
244 for (
const auto &IDAndScore : IDAndScores) {
245 const DocID SymbolDocID = IDAndScore.first;
246 const auto *Sym = Symbols[SymbolDocID];
247 const llvm::Optional<float>
Score = Filter.match(Sym->
Name);
252 const float FinalScore =
253 (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second;
256 if (Top.
push({SymbolDocID, FinalScore}))
262 for (
const auto &Item : std::move(Top).items())
270 for (
const auto &ID : Req.
IDs) {
271 auto I = LookupTable.find(ID);
272 if (I != LookupTable.end())
278 llvm::function_ref<
void(
const Ref &)>
Callback)
const {
281 Req.
Limit.getValueOr(std::numeric_limits<uint32_t>::max());
282 for (
const auto &ID : Req.
IDs)
283 for (
const auto &
Ref : Refs.lookup(ID)) {
299 Req.
Limit.getValueOr(std::numeric_limits<uint32_t>::max());
302 auto It = Relations.find(
303 std::make_pair(Subject, static_cast<uint8_t>(Req.
Predicate)));
304 if (It != Relations.end()) {
305 for (
const auto &
Object : It->second) {
317 size_t Bytes = Symbols.size() *
sizeof(
const Symbol *);
318 Bytes += SymbolQuality.size() *
sizeof(float);
319 Bytes += LookupTable.getMemorySize();
320 Bytes += InvertedIndex.getMemorySize();
321 for (
const auto &TokenToPostingList : InvertedIndex)
322 Bytes += TokenToPostingList.second.bytes();
323 Bytes += Refs.getMemorySize();
324 Bytes += Relations.getMemorySize();
325 return Bytes + BackingDataSize;
329 std::vector<std::string> Result;
332 "Non-empty argument of generateProximityURIs() should be a valid "
334 llvm::StringRef Body = ParsedURI->body();
343 Result.emplace_back(ParsedURI->toString());
344 while (!Body.empty() && --Limit > 0) {
347 Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
350 URI(ParsedURI->scheme(), ParsedURI->authority(), Body).
toString());