clang-tools  7.0.0
FuzzySymbolIndex.cpp
Go to the documentation of this file.
1 //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "FuzzySymbolIndex.h"
10 #include "llvm/Support/Regex.h"
11 
13 using llvm::StringRef;
14 
15 namespace clang {
16 namespace include_fixer {
17 namespace {
18 
19 class MemSymbolIndex : public FuzzySymbolIndex {
20 public:
21  MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
22  for (auto &Symbol : Symbols) {
23  auto Tokens = tokenize(Symbol.Symbol.getName());
24  this->Symbols.emplace_back(
25  StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
26  std::move(Symbol));
27  }
28  }
29 
30  std::vector<SymbolAndSignals> search(StringRef Query) override {
31  auto Tokens = tokenize(Query);
32  llvm::Regex Pattern("^" + queryRegexp(Tokens));
33  std::vector<SymbolAndSignals> Results;
34  for (const Entry &E : Symbols)
35  if (Pattern.match(E.first))
36  Results.push_back(E.second);
37  return Results;
38  }
39 
40 private:
41  using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
42  std::vector<Entry> Symbols;
43 };
44 
45 // Helpers for tokenize state machine.
47  EMPTY, // No pending characters.
48  ONE_BIG, // Read one uppercase letter, could be WORD or Word.
49  BIG_WORD, // Reading an uppercase WORD.
50  SMALL_WORD, // Reading a lowercase word.
51  NUMBER // Reading a number.
52 };
53 
54 enum CharType { UPPER, LOWER, DIGIT, MISC };
55 CharType classify(char c) {
56  if (isupper(c))
57  return UPPER;
58  if (islower(c))
59  return LOWER;
60  if (isdigit(c))
61  return DIGIT;
62  return MISC;
63 }
64 
65 } // namespace
66 
67 std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
68  std::vector<std::string> Result;
69  // State describes the treatment of text from Start to I.
70  // Once text is Flush()ed into Result, we're done with it and advance Start.
71  TokenizeState State = EMPTY;
72  size_t Start = 0;
73  auto Flush = [&](size_t End) {
74  if (State != EMPTY) {
75  Result.push_back(Text.substr(Start, End - Start).lower());
76  State = EMPTY;
77  }
78  Start = End;
79  };
80  for (size_t I = 0; I < Text.size(); ++I) {
81  CharType Type = classify(Text[I]);
82  if (Type == MISC)
83  Flush(I);
84  else if (Type == LOWER)
85  switch (State) {
86  case BIG_WORD:
87  Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
88  LLVM_FALLTHROUGH;
89  case ONE_BIG:
90  State = SMALL_WORD;
91  LLVM_FALLTHROUGH;
92  case SMALL_WORD:
93  break;
94  default:
95  Flush(I);
96  State = SMALL_WORD;
97  }
98  else if (Type == UPPER)
99  switch (State) {
100  case ONE_BIG:
101  State = BIG_WORD;
102  LLVM_FALLTHROUGH;
103  case BIG_WORD:
104  break;
105  default:
106  Flush(I);
107  State = ONE_BIG;
108  }
109  else if (Type == DIGIT && State != NUMBER) {
110  Flush(I);
111  State = NUMBER;
112  }
113  }
114  Flush(Text.size());
115  return Result;
116 }
117 
118 std::string
119 FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
120  std::string Result;
121  for (size_t I = 0; I < Tokens.size(); ++I) {
122  if (I)
123  Result.append("[[:alnum:]]* ");
124  for (size_t J = 0; J < Tokens[I].size(); ++J) {
125  if (J)
126  Result.append("([[:alnum:]]* )?");
127  Result.push_back(Tokens[I][J]);
128  }
129  }
130  return Result;
131 }
132 
133 llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
134 FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
135  auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
136  if (!Buffer)
137  return llvm::errorCodeToError(Buffer.getError());
138  return llvm::make_unique<MemSymbolIndex>(
139  find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
140 }
141 
142 } // namespace include_fixer
143 } // namespace clang
std::vector< CodeCompletionResult > Results
static llvm::Expected< std::unique_ptr< FuzzySymbolIndex > > createFromYAML(llvm::StringRef File)
std::vector< SymbolAndSignals > ReadSymbolInfosFromYAML(llvm::StringRef Yaml)
Read SymbolInfos from a YAML document.
Definition: SymbolInfo.cpp:129
StringRef Tokens
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static std::string queryRegexp(const std::vector< std::string > &Tokens)
static std::string join(ArrayRef< SpecialMemberFunctionsCheck::SpecialMemberFunctionKind > SMFS, llvm::StringRef AndOr)
static std::vector< std::string > tokenize(llvm::StringRef Text)