clang-tools  11.0.0
Iterator.cpp
Go to the documentation of this file.
1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Iterator.h"
10 #include "llvm/Support/Casting.h"
11 #include <algorithm>
12 #include <cassert>
13 #include <numeric>
14 
15 namespace clang {
16 namespace clangd {
17 namespace dex {
18 namespace {
19 
20 /// Implements Iterator over the intersection of other iterators.
21 ///
22 /// AndIterator iterates through common items among all children. It becomes
23 /// exhausted as soon as any child becomes exhausted. After each mutation, the
24 /// iterator restores the invariant: all children must point to the same item.
25 class AndIterator : public Iterator {
26 public:
27  explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
28  : Iterator(Kind::And), Children(std::move(AllChildren)) {
29  assert(!Children.empty() && "AND iterator should have at least one child.");
30  // Establish invariants.
31  for (const auto &Child : Children)
32  ReachedEnd |= Child->reachedEnd();
33  sync();
34  // When children are sorted by the estimateSize(), sync() calls are more
35  // effective. Each sync() starts with the first child and makes sure all
36  // children point to the same element. If any child is "above" the previous
37  // ones, the algorithm resets and and advances the children to the next
38  // highest element starting from the front. When child iterators in the
39  // beginning have smaller estimated size, the sync() will have less restarts
40  // and become more effective.
41  llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS,
42  const std::unique_ptr<Iterator> &RHS) {
43  return LHS->estimateSize() < RHS->estimateSize();
44  });
45  }
46 
47  bool reachedEnd() const override { return ReachedEnd; }
48 
49  /// Advances all children to the next common item.
50  void advance() override {
51  assert(!reachedEnd() && "AND iterator can't advance() at the end.");
52  Children.front()->advance();
53  sync();
54  }
55 
56  /// Advances all children to the next common item with DocumentID >= ID.
57  void advanceTo(DocID ID) override {
58  assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
59  Children.front()->advanceTo(ID);
60  sync();
61  }
62 
63  DocID peek() const override { return Children.front()->peek(); }
64 
65  float consume() override {
66  assert(!reachedEnd() && "AND iterator can't consume() at the end.");
67  float Boost = 1;
68  for (const auto &Child : Children)
69  Boost *= Child->consume();
70  return Boost;
71  }
72 
73  size_t estimateSize() const override {
74  return Children.front()->estimateSize();
75  }
76 
77 private:
78  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
79  OS << "(& ";
80  auto Separator = "";
81  for (const auto &Child : Children) {
82  OS << Separator << *Child;
83  Separator = " ";
84  }
85  OS << ')';
86  return OS;
87  }
88 
89  /// Restores class invariants: each child will point to the same element after
90  /// sync.
91  void sync() {
92  ReachedEnd |= Children.front()->reachedEnd();
93  if (ReachedEnd)
94  return;
95  auto SyncID = Children.front()->peek();
96  // Indicates whether any child needs to be advanced to new SyncID.
97  bool NeedsAdvance = false;
98  do {
99  NeedsAdvance = false;
100  for (auto &Child : Children) {
101  Child->advanceTo(SyncID);
102  ReachedEnd |= Child->reachedEnd();
103  // If any child reaches end And iterator can not match any other items.
104  // In this case, just terminate the process.
105  if (ReachedEnd)
106  return;
107  // If any child goes beyond given ID (i.e. ID is not the common item),
108  // all children should be advanced to the next common item.
109  if (Child->peek() > SyncID) {
110  SyncID = Child->peek();
111  NeedsAdvance = true;
112  }
113  }
114  } while (NeedsAdvance);
115  }
116 
117  /// AndIterator owns its children and ensures that all of them point to the
118  /// same element. As soon as one child gets exhausted, AndIterator can no
119  /// longer advance and has reached its end.
120  std::vector<std::unique_ptr<Iterator>> Children;
121  /// Indicates whether any child is exhausted. It is cheaper to maintain and
122  /// update the field, rather than traversing the whole subtree in each
123  /// reachedEnd() call.
124  bool ReachedEnd = false;
125  friend Corpus; // For optimizations.
126 };
127 
128 /// Implements Iterator over the union of other iterators.
129 ///
130 /// OrIterator iterates through all items which can be pointed to by at least
131 /// one child. To preserve the sorted order, this iterator always advances the
132 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
133 /// soon as all of its children are exhausted.
134 class OrIterator : public Iterator {
135 public:
136  explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
137  : Iterator(Kind::Or), Children(std::move(AllChildren)) {
138  assert(!Children.empty() && "OR iterator should have at least one child.");
139  }
140 
141  /// Returns true if all children are exhausted.
142  bool reachedEnd() const override {
143  for (const auto &Child : Children)
144  if (!Child->reachedEnd())
145  return false;
146  return true;
147  }
148 
149  /// Moves each child pointing to the smallest DocID to the next item.
150  void advance() override {
151  assert(!reachedEnd() && "OR iterator can't advance() at the end.");
152  const auto SmallestID = peek();
153  for (const auto &Child : Children)
154  if (!Child->reachedEnd() && Child->peek() == SmallestID)
155  Child->advance();
156  }
157 
158  /// Advances each child to the next existing element with DocumentID >= ID.
159  void advanceTo(DocID ID) override {
160  assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
161  for (const auto &Child : Children)
162  if (!Child->reachedEnd())
163  Child->advanceTo(ID);
164  }
165 
166  /// Returns the element under cursor of the child with smallest Child->peek()
167  /// value.
168  DocID peek() const override {
169  assert(!reachedEnd() && "OR iterator can't peek() at the end.");
170  DocID Result = std::numeric_limits<DocID>::max();
171 
172  for (const auto &Child : Children)
173  if (!Child->reachedEnd())
174  Result = std::min(Result, Child->peek());
175 
176  return Result;
177  }
178 
179  // Returns the maximum boosting score among all Children when iterator
180  // points to the current ID.
181  float consume() override {
182  assert(!reachedEnd() && "OR iterator can't consume() at the end.");
183  const DocID ID = peek();
184  float Boost = 1;
185  for (const auto &Child : Children)
186  if (!Child->reachedEnd() && Child->peek() == ID)
187  Boost = std::max(Boost, Child->consume());
188  return Boost;
189  }
190 
191  size_t estimateSize() const override {
192  size_t Size = 0;
193  for (const auto &Child : Children)
194  Size = std::max(Size, Child->estimateSize());
195  return Size;
196  }
197 
198 private:
199  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
200  OS << "(| ";
201  auto Separator = "";
202  for (const auto &Child : Children) {
203  OS << Separator << *Child;
204  Separator = " ";
205  }
206  OS << ')';
207  return OS;
208  }
209 
210  // FIXME(kbobyrev): Would storing Children in min-heap be faster?
211  std::vector<std::unique_ptr<Iterator>> Children;
212  friend Corpus; // For optimizations.
213 };
214 
215 /// TrueIterator handles PostingLists which contain all items of the index. It
216 /// stores size of the virtual posting list, and all operations are performed
217 /// in O(1).
218 class TrueIterator : public Iterator {
219 public:
220  explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {}
221 
222  bool reachedEnd() const override { return Index >= Size; }
223 
224  void advance() override {
225  assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
226  ++Index;
227  }
228 
229  void advanceTo(DocID ID) override {
230  assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
231  Index = std::min(ID, Size);
232  }
233 
234  DocID peek() const override {
235  assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
236  return Index;
237  }
238 
239  float consume() override {
240  assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
241  return 1;
242  }
243 
244  size_t estimateSize() const override { return Size; }
245 
246 private:
247  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
248  return OS << "true";
249  }
250 
251  DocID Index = 0;
252  /// Size of the underlying virtual PostingList.
253  DocID Size;
254 };
255 
256 /// FalseIterator yields no results.
257 class FalseIterator : public Iterator {
258 public:
259  FalseIterator() : Iterator(Kind::False) {}
260  bool reachedEnd() const override { return true; }
261  void advance() override { assert(false); }
262  void advanceTo(DocID ID) override { assert(false); }
263  DocID peek() const override {
264  assert(false);
265  return 0;
266  }
267  float consume() override {
268  assert(false);
269  return 1;
270  }
271  size_t estimateSize() const override { return 0; }
272 
273 private:
274  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
275  return OS << "false";
276  }
277 };
278 
279 /// Boost iterator is a wrapper around its child which multiplies scores of
280 /// each retrieved item by a given factor.
281 class BoostIterator : public Iterator {
282 public:
283  BoostIterator(std::unique_ptr<Iterator> Child, float Factor)
284  : Child(std::move(Child)), Factor(Factor) {}
285 
286  bool reachedEnd() const override { return Child->reachedEnd(); }
287 
288  void advance() override { Child->advance(); }
289 
290  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
291 
292  DocID peek() const override { return Child->peek(); }
293 
294  float consume() override { return Child->consume() * Factor; }
295 
296  size_t estimateSize() const override { return Child->estimateSize(); }
297 
298 private:
299  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
300  return OS << "(* " << Factor << ' ' << *Child << ')';
301  }
302 
303  std::unique_ptr<Iterator> Child;
304  float Factor;
305 };
306 
307 /// This iterator limits the number of items retrieved from the child iterator
308 /// on top of the query tree. To ensure that query tree with LIMIT iterators
309 /// inside works correctly, users have to call Root->consume(Root->peek()) each
310 /// time item is retrieved at the root of query tree.
311 class LimitIterator : public Iterator {
312 public:
313  LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
314  : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {}
315 
316  bool reachedEnd() const override {
317  return ItemsLeft == 0 || Child->reachedEnd();
318  }
319 
320  void advance() override { Child->advance(); }
321 
322  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
323 
324  DocID peek() const override { return Child->peek(); }
325 
326  /// Decreases the limit in case the element consumed at top of the query tree
327  /// comes from the underlying iterator.
328  float consume() override {
329  assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
330  --ItemsLeft;
331  return Child->consume();
332  }
333 
334  size_t estimateSize() const override {
335  return std::min(Child->estimateSize(), Limit);
336  }
337 
338 private:
339  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
340  return OS << "(LIMIT " << Limit << " " << *Child << ')';
341  }
342 
343  std::unique_ptr<Iterator> Child;
344  size_t Limit;
345  size_t ItemsLeft;
346 };
347 
348 } // end namespace
349 
350 std::vector<std::pair<DocID, float>> consume(Iterator &It) {
351  std::vector<std::pair<DocID, float>> Result;
352  for (; !It.reachedEnd(); It.advance())
353  Result.emplace_back(It.peek(), It.consume());
354  return Result;
355 }
356 
357 std::unique_ptr<Iterator>
358 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
359  std::vector<std::unique_ptr<Iterator>> RealChildren;
360  for (auto &Child : Children) {
361  switch (Child->kind()) {
363  break; // No effect, drop the iterator.
365  return std::move(Child); // Intersection is empty.
366  case Iterator::Kind::And: {
367  // Inline nested AND into parent AND.
368  auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children;
369  std::move(NewChildren.begin(), NewChildren.end(),
370  std::back_inserter(RealChildren));
371  break;
372  }
373  default:
374  RealChildren.push_back(std::move(Child));
375  }
376  }
377  switch (RealChildren.size()) {
378  case 0:
379  return all();
380  case 1:
381  return std::move(RealChildren.front());
382  default:
383  return std::make_unique<AndIterator>(std::move(RealChildren));
384  }
385 }
386 
387 std::unique_ptr<Iterator>
388 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
389  std::vector<std::unique_ptr<Iterator>> RealChildren;
390  for (auto &Child : Children) {
391  switch (Child->kind()) {
393  break; // No effect, drop the iterator.
394  case Iterator::Kind::Or: {
395  // Inline nested OR into parent OR.
396  auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children;
397  std::move(NewChildren.begin(), NewChildren.end(),
398  std::back_inserter(RealChildren));
399  break;
400  }
402  // Don't return all(), which would discard sibling boosts.
403  default:
404  RealChildren.push_back(std::move(Child));
405  }
406  }
407  switch (RealChildren.size()) {
408  case 0:
409  return none();
410  case 1:
411  return std::move(RealChildren.front());
412  default:
413  return std::make_unique<OrIterator>(std::move(RealChildren));
414  }
415 }
416 
417 std::unique_ptr<Iterator> Corpus::all() const {
418  return std::make_unique<TrueIterator>(Size);
419 }
420 
421 std::unique_ptr<Iterator> Corpus::none() const {
422  return std::make_unique<FalseIterator>();
423 }
424 
425 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
426  float Factor) const {
427  if (Factor == 1)
428  return Child;
429  if (Child->kind() == Iterator::Kind::False)
430  return Child;
431  return std::make_unique<BoostIterator>(std::move(Child), Factor);
432 }
433 
434 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
435  size_t Limit) const {
436  if (Child->kind() == Iterator::Kind::False)
437  return Child;
438  return std::make_unique<LimitIterator>(std::move(Child), Limit);
439 }
440 
441 } // namespace dex
442 } // namespace clangd
443 } // namespace clang
clang::clangd::dex::Corpus::boost
std::unique_ptr< Iterator > boost(std::unique_ptr< Iterator > Child, float Factor) const
Returns BOOST iterator which multiplies the score of each item by given factor.
Definition: Iterator.cpp:425
clang::clangd::dex::Iterator::Kind::True
clang::clangd::dex::consume
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:350
Kind
BindArgumentKind Kind
Definition: AvoidBindCheck.cpp:59
clang::clangd::dex::Corpus::all
std::unique_ptr< Iterator > all() const
Returns TRUE Iterator which iterates over "virtual" PostingList containing all items in range [0,...
Definition: Iterator.cpp:417
clang::clangd::dex::Corpus::intersect
std::unique_ptr< Iterator > intersect(std::vector< std::unique_ptr< Iterator >> Children) const
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:358
clang::clangd::dex::Corpus::none
std::unique_ptr< Iterator > none() const
Returns FALSE Iterator which iterates over no documents.
Definition: Iterator.cpp:421
Children
std::vector< std::unique_ptr< HTMLNode > > Children
Definition: HTMLGenerator.cpp:91
clang::clangd::dex::Iterator::advance
virtual void advance()=0
Moves to next valid DocID.
clang::clangd::dex::Iterator::Kind::Or
clang::clangd::dex::Iterator::Kind::And
clang::clangd::Separator
Definition: FuzzyMatch.h:59
clang::clangd::dex::Iterator::peek
virtual DocID peek() const =0
Returns the current element this iterator points to.
Index
const SymbolIndex * Index
Definition: Dexp.cpp:95
Iterator.h
clang::clangd::dex::Iterator
Iterator is the interface for Query Tree node.
Definition: Iterator.h:54
clang::clangd::dex::Iterator::reachedEnd
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:162
clang::clangd::dex::DocID
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:46
clang::clangd::dex::Corpus::limit
std::unique_ptr< Iterator > limit(std::unique_ptr< Iterator > Child, size_t Limit) const
Returns LIMIT iterator, which yields up to N elements of its child iterator.
Definition: Iterator.cpp:434
clang::clangd::dex::Iterator::consume
virtual float consume()=0
Informs the iterator that the current document was consumed, and returns its boost.
clang::clangd::dex::Corpus::unionOf
std::unique_ptr< Iterator > unionOf(std::vector< std::unique_ptr< Iterator >> Children) const
Returns OR Iterator which performs the union of the PostingLists of its children.
Definition: Iterator.cpp:388
clang::clangd::dex::Iterator::Kind::False