clang-tools
9.0.0
llvm.src
tools
clang
tools
extra
clangd
index
dex
PostingList.h
Go to the documentation of this file.
1
//===--- PostingList.h - Symbol identifiers storage interface --*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This defines posting list interface: a storage for identifiers of symbols
11
/// which can be characterized by a specific feature (such as fuzzy-find
12
/// trigram, scope, type or any other Search Token). Posting lists can be
13
/// traversed in order using an iterator and are values for inverted index,
14
/// which maps search tokens to corresponding posting lists.
15
///
16
/// In order to decrease size of Index in-memory representation, Variable Byte
17
/// Encoding (VByte) is used for PostingLists compression. An overview of VByte
18
/// algorithm can be found in "Introduction to Information Retrieval" book:
19
/// https://nlp.stanford.edu/IR-book/html/htmledition/variable-byte-codes-1.html
20
///
21
//===----------------------------------------------------------------------===//
22
23
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
24
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
25
26
#include "
Iterator.h
"
27
#include "llvm/ADT/ArrayRef.h"
28
#include "llvm/ADT/SmallVector.h"
29
#include <cstdint>
30
#include <vector>
31
32
namespace
clang
{
33
namespace
clangd {
34
namespace
dex {
35
class
Token;
36
37
/// NOTE: This is an implementation detail.
38
///
39
/// Chunk is a fixed-width piece of PostingList which contains the first DocID
40
/// in uncompressed format (Head) and delta-encoded Payload. It can be
41
/// decompressed upon request.
42
struct
Chunk
{
43
/// Keep sizeof(Chunk) == 32.
44
static
constexpr
size_t
PayloadSize
= 32 -
sizeof
(
DocID
);
45
46
llvm::SmallVector<DocID, PayloadSize + 1>
decompress
()
const
;
47
48
/// The first element of decompressed Chunk.
49
DocID
Head
;
50
/// VByte-encoded deltas.
51
std::array<uint8_t, PayloadSize>
Payload
;
52
};
53
static_assert(
sizeof
(
Chunk
) == 32,
"Chunk should take 32 bytes of memory."
);
54
55
/// PostingList is the storage of DocIDs which can be inserted to the Query
56
/// Tree as a leaf by constructing Iterator over the PostingList object. DocIDs
57
/// are stored in underlying chunks. Compression saves memory at a small cost
58
/// in access time, which is still fast enough in practice.
59
class
PostingList
{
60
public
:
61
explicit
PostingList
(llvm::ArrayRef<DocID> Documents);
62
63
/// Constructs DocumentIterator over given posting list. DocumentIterator will
64
/// go through the chunks and decompress them on-the-fly when necessary.
65
/// If given, Tok is only used for the string representation.
66
std::unique_ptr<Iterator> iterator(
const
Token
*Tok =
nullptr
)
const
;
67
68
/// Returns in-memory size of external storage.
69
size_t
bytes
()
const
{
return
Chunks.capacity() *
sizeof
(
Chunk
); }
70
71
private
:
72
const
std::vector<Chunk> Chunks;
73
};
74
75
}
// namespace dex
76
}
// namespace clangd
77
}
// namespace clang
78
79
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
clang::clangd::dex::Chunk::Head
DocID Head
The first element of decompressed Chunk.
Definition:
PostingList.h:49
clang::clangd::dex::PostingList
PostingList is the storage of DocIDs which can be inserted to the Query Tree as a leaf by constructin...
Definition:
PostingList.h:59
clang::clangd::dex::PostingList::bytes
size_t bytes() const
Returns in-memory size of external storage.
Definition:
PostingList.h:69
clang::clangd::dex::Chunk::PayloadSize
static constexpr size_t PayloadSize
Keep sizeof(Chunk) == 32.
Definition:
PostingList.h:44
clang::clangd::dex::DocID
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition:
Iterator.h:46
clang::clangd::dex::Chunk
NOTE: This is an implementation detail.
Definition:
PostingList.h:42
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition:
ApplyReplacements.h:27
Iterator.h
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
clang::clangd::dex::Token
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition:
Token.h:40
clang::clangd::dex::Chunk::Payload
std::array< uint8_t, PayloadSize > Payload
VByte-encoded deltas.
Definition:
PostingList.h:51
clang::clangd::dex::Chunk::decompress
llvm::SmallVector< DocID, PayloadSize+1 > decompress() const
Definition:
PostingList.cpp:205
Generated on Mon Jul 29 2019 15:12:11 for clang-tools by
1.8.13