clang-tools  7.0.0
Trigram.h
Go to the documentation of this file.
1 //===--- Trigram.h - Trigram generation for Fuzzy Matching ------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Trigrams are attributes of the symbol unqualified name used to effectively
11 // extract symbols which can be fuzzy-matched given user query from the inverted
12 // index. To match query with the extracted set of trigrams Q, the set of
13 // generated trigrams T for identifier (unqualified symbol name) should contain
14 // all items of Q, i.e. Q ⊆ T.
15 //
16 // Trigram sets extracted from unqualified name and from query are different:
17 // the set of query trigrams only contains consecutive sequences of three
18 // characters (which is only a subset of all trigrams generated for an
19 // identifier).
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TRIGRAM_H
24 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TRIGRAM_H
25 
26 #include "Token.h"
27 
28 #include <string>
29 
30 namespace clang {
31 namespace clangd {
32 namespace dex {
33 
34 /// Returns list of unique fuzzy-search trigrams from unqualified symbol.
35 ///
36 /// First, given Identifier (unqualified symbol name) is segmented using
37 /// FuzzyMatch API and lowercased. After segmentation, the following technique
38 /// is applied for generating trigrams: for each letter or digit in the input
39 /// string the algorithms looks for the possible next and skip-1-next symbols
40 /// which can be jumped to during fuzzy matching. Each combination of such three
41 /// symbols is inserted into the result.
42 ///
43 /// Trigrams can start at any character in the input. Then we can choose to move
44 /// to the next character, move to the start of the next segment, or skip over a
45 /// segment.
46 ///
47 /// Note: the returned list of trigrams does not have duplicates, if any trigram
48 /// belongs to more than one class it is only inserted once.
49 std::vector<Token> generateIdentifierTrigrams(llvm::StringRef Identifier);
50 
51 /// Returns list of unique fuzzy-search trigrams given a query.
52 ///
53 /// Query is segmented using FuzzyMatch API and downcasted to lowercase. Then,
54 /// the simplest trigrams - sequences of three consecutive letters and digits
55 /// are extracted and returned after deduplication.
56 std::vector<Token> generateQueryTrigrams(llvm::StringRef Query);
57 
58 } // namespace dex
59 } // namespace clangd
60 } // namespace clang
61 
62 #endif
std::vector< Token > generateIdentifierTrigrams(llvm::StringRef Identifier)
Returns list of unique fuzzy-search trigrams from unqualified symbol.
Definition: Trigram.cpp:34
std::vector< Token > generateQueryTrigrams(llvm::StringRef Query)
Returns list of unique fuzzy-search trigrams given a query.
Definition: Trigram.cpp:94
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//