clang-tools  7.0.0
Iterator.h
Go to the documentation of this file.
1 //===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Symbol index queries consist of specific requirements for the requested
11 // symbol, such as high fuzzy matching score, scope, type etc. The lists of all
12 // symbols matching some criteria (e.g. belonging to "clang::clangd::" scope)
13 // are expressed in a form of Search Tokens which are stored in the inverted
14 // index. Inverted index maps these tokens to the posting lists - sorted ( by
15 // symbol quality) sequences of symbol IDs matching the token, e.g. scope token
16 // "clangd::clangd::" is mapped to the list of IDs of all symbols which are
17 // declared in this namespace. Search queries are build from a set of
18 // requirements which can be combined with each other forming the query trees.
19 // The leafs of such trees are posting lists, and the nodes are operations on
20 // these posting lists, e.g. intersection or union. Efficient processing of
21 // these multi-level queries is handled by Iterators. Iterators advance through
22 // all leaf posting lists producing the result of search query, which preserves
23 // the sorted order of IDs. Having the resulting IDs sorted is important,
24 // because it allows receiving a certain number of the most valuable items (e.g.
25 // symbols with highest quality which was the sorting key in the first place)
26 // without processing all items with requested properties (this might not be
27 // computationally effective if search request is not very restrictive).
28 //
29 //===----------------------------------------------------------------------===//
30 
31 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
32 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
33 
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <memory>
38 #include <vector>
39 
40 namespace clang {
41 namespace clangd {
42 namespace dex {
43 
44 /// Symbol position in the list of all index symbols sorted by a pre-computed
45 /// symbol quality.
46 using DocID = uint32_t;
47 /// Contains sorted sequence of DocIDs all of which belong to symbols matching
48 /// certain criteria, i.e. containing a Search Token. PostingLists are values
49 /// for the inverted index.
50 using PostingList = std::vector<DocID>;
51 /// Immutable reference to PostingList object.
52 using PostingListRef = llvm::ArrayRef<DocID>;
53 
54 /// Iterator is the interface for Query Tree node. The simplest type of Iterator
55 /// is DocumentIterator which is simply a wrapper around PostingList iterator
56 /// and serves as the Query Tree leaf. More sophisticated examples of iterators
57 /// can manage intersection, union of the elements produced by other iterators
58 /// (their children) to form a multi-level Query Tree. The interface is designed
59 /// to be extensible in order to support multiple types of iterators.
60 class Iterator {
61  // FIXME(kbobyrev): Provide callback for matched documents.
62  // FIXME(kbobyrev): Implement new types of iterators: Label, Boost (with
63  // scoring), Limit.
64  // FIXME(kbobyrev): Implement iterator cost, an estimate of advance() calls
65  // before iterator exhaustion.
66 public:
67  /// Returns true if all valid DocIDs were processed and hence the iterator is
68  /// exhausted.
69  virtual bool reachedEnd() const = 0;
70  /// Moves to next valid DocID. If it doesn't exist, the iterator is exhausted
71  /// and proceeds to the END.
72  ///
73  /// Note: reachedEnd() must be false.
74  virtual void advance() = 0;
75  /// Moves to the first valid DocID which is equal or higher than given ID. If
76  /// it doesn't exist, the iterator is exhausted and proceeds to the END.
77  ///
78  /// Note: reachedEnd() must be false.
79  virtual void advanceTo(DocID ID) = 0;
80  /// Returns the current element this iterator points to.
81  ///
82  /// Note: reachedEnd() must be false.
83  virtual DocID peek() const = 0;
84 
85  virtual ~Iterator() {}
86 
87  /// Prints a convenient human-readable iterator representation by recursively
88  /// dumping iterators in the following format:
89  ///
90  /// (Type Child1 Child2 ...)
91  ///
92  /// Where Type is the iterator type representation: "&" for And, "|" for Or,
93  /// ChildN is N-th iterator child. Raw iterators over PostingList are
94  /// represented as "[ID1, ID2, ...]" where IDN is N-th PostingList entry.
95  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
96  const Iterator &Iterator) {
97  return Iterator.dump(OS);
98  }
99 
100 private:
101  virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
102 };
103 
104 /// Exhausts given iterator and returns all processed DocIDs. The result
105 /// contains sorted DocumentIDs.
106 std::vector<DocID> consume(Iterator &It);
107 
108 /// Returns a document iterator over given PostingList.
109 std::unique_ptr<Iterator> create(PostingListRef Documents);
110 
111 /// Returns AND Iterator which performs the intersection of the PostingLists of
112 /// its children.
113 std::unique_ptr<Iterator>
114 createAnd(std::vector<std::unique_ptr<Iterator>> Children);
115 
116 /// Returns OR Iterator which performs the union of the PostingLists of its
117 /// children.
118 std::unique_ptr<Iterator>
119 createOr(std::vector<std::unique_ptr<Iterator>> Children);
120 
121 /// This allows createAnd(create(...), create(...)) syntax.
122 template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) {
123  std::vector<std::unique_ptr<Iterator>> Children;
124  populateChildren(Children, args...);
125  return createAnd(move(Children));
126 }
127 
128 /// This allows createOr(create(...), create(...)) syntax.
129 template <typename... Args> std::unique_ptr<Iterator> createOr(Args... args) {
130  std::vector<std::unique_ptr<Iterator>> Children;
131  populateChildren(Children, args...);
132  return createOr(move(Children));
133 }
134 
135 template <typename HeadT, typename... TailT>
136 void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
137  HeadT &Head, TailT &... Tail) {
138  Children.push_back(move(Head));
139  populateChildren(Children, Tail...);
140 }
141 
142 template <typename HeadT>
143 void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
144  HeadT &Head) {
145  Children.push_back(move(Head));
146 }
147 
148 } // namespace dex
149 } // namespace clangd
150 } // namespace clang
151 
152 #endif
std::vector< DocID > PostingList
Contains sorted sequence of DocIDs all of which belong to symbols matching certain criteria...
Definition: Iterator.h:50
std::vector< DocID > consume(Iterator &It)
Exhausts given iterator and returns all processed DocIDs.
Definition: Iterator.cpp:221
llvm::ArrayRef< DocID > PostingListRef
Immutable reference to PostingList object.
Definition: Iterator.h:52
Iterator is the interface for Query Tree node.
Definition: Iterator.h:60
virtual DocID peek() const =0
Returns the current element this iterator points to.
std::unique_ptr< Iterator > create(PostingListRef Documents)
Returns a document iterator over given PostingList.
Definition: Iterator.cpp:228
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:46
virtual void advance()=0
Moves to next valid DocID.
std::unique_ptr< Iterator > createOr(std::vector< std::unique_ptr< Iterator >> Children)
Returns OR Iterator which performs the union of the PostingLists of its children. ...
Definition: Iterator.cpp:238
std::unique_ptr< Iterator > createAnd(std::vector< std::unique_ptr< Iterator >> Children)
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:233
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
virtual void advanceTo(DocID ID)=0
Moves to the first valid DocID which is equal or higher than given ID.
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
void populateChildren(std::vector< std::unique_ptr< Iterator >> &Children, HeadT &Head, TailT &... Tail)
Definition: Iterator.h:136
friend llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const Iterator &Iterator)
Prints a convenient human-readable iterator representation by recursively dumping iterators in the fo...
Definition: Iterator.h:95