clang-tools  11.0.0
Dexp.cpp
Go to the documentation of this file.
1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a simple interactive tool which can be used to manually
10 // evaluate symbol search quality of Clangd index.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "index/Serialization.h"
15 #include "index/dex/Dex.h"
16 #include "index/remote/Client.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/LineEditor/LineEditor.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/Signals.h"
23 
24 namespace clang {
25 namespace clangd {
26 namespace {
27 
28 llvm::cl::opt<std::string> IndexLocation(
29  llvm::cl::desc("<path to index file | remote:server.address>"),
30  llvm::cl::Positional);
31 
32 llvm::cl::opt<std::string>
33  ExecCommand("c", llvm::cl::desc("Command to execute and then exit"));
34 
35 llvm::cl::opt<std::string> ProjectRoot("project-root",
36  llvm::cl::desc("Path to the project"));
37 
38 static constexpr char Overview[] = R"(
39 This is an **experimental** interactive tool to process user-provided search
40 queries over given symbol collection obtained via clangd-indexer. The
41 tool can be used to evaluate search quality of existing index implementations
42 and manually construct non-trivial test cases.
43 
44 You can connect to remote index by passing remote:address to dexp. Example:
45 
46 $ dexp remote:0.0.0.0:9000
47 
48 Type use "help" request to get information about the details.
49 )";
50 
51 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
52  const auto TimerStart = std::chrono::high_resolution_clock::now();
53  F();
54  const auto TimerStop = std::chrono::high_resolution_clock::now();
55  const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
56  TimerStop - TimerStart);
57  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
58 }
59 
60 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
61  const SymbolIndex *Index) {
62  FuzzyFindRequest Request;
63  // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
64  // qualifier for global scope.
65  bool IsGlobalScope = QualifiedName.consume_front("::");
66  auto Names = splitQualifiedName(QualifiedName);
67  if (IsGlobalScope || !Names.first.empty())
68  Request.Scopes = {std::string(Names.first)};
69  else
70  // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
71  // add the global scope to the request.
72  Request.Scopes = {""};
73 
74  Request.Query = std::string(Names.second);
75  std::vector<SymbolID> SymIDs;
76  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
77  std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
78  if (QualifiedName == SymQualifiedName)
79  SymIDs.push_back(Sym.ID);
80  });
81  return SymIDs;
82 }
83 
84 // REPL commands inherit from Command and contain their options as members.
85 // Creating a Command populates parser options, parseAndRun() resets them.
86 class Command {
87  // By resetting the parser options, we lost the standard -help flag.
88  llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
89  "help", llvm::cl::desc("Display available options"),
90  llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::GeneralCategory)};
91  // FIXME: Allow commands to signal failure.
92  virtual void run() = 0;
93 
94 protected:
95  const SymbolIndex *Index;
96 
97 public:
98  virtual ~Command() = default;
99  bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
100  const SymbolIndex &Index) {
101  std::string ParseErrs;
102  llvm::raw_string_ostream OS(ParseErrs);
103  bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
104  Overview, &OS);
105  // must do this before opts are destroyed
106  auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
107  if (Help.getNumOccurrences() > 0) {
108  // Avoid printing parse errors in this case.
109  // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
110  llvm::cl::PrintHelpMessage();
111  return true;
112  }
113 
114  llvm::outs() << OS.str();
115  if (Ok) {
116  this->Index = &Index;
117  reportTime(Argv[0], [&] { run(); });
118  }
119  return Ok;
120  }
121 };
122 
123 // FIXME(kbobyrev): Ideas for more commands:
124 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
125 // usages in the tool driver and actually use llvm::cl library in the REPL.
126 // * show posting list density histogram (our dump data somewhere so that user
127 // could build one)
128 // * show number of tokens of each kind
129 // * print out tokens with the most dense posting lists
130 // * print out tokens with least dense posting lists
131 
132 class FuzzyFind : public Command {
133  llvm::cl::opt<std::string> Query{
134  "query",
135  llvm::cl::Positional,
136  llvm::cl::Required,
137  llvm::cl::desc("Query string to be fuzzy-matched"),
138  };
139  llvm::cl::opt<std::string> Scopes{
140  "scopes",
141  llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
142  };
143  llvm::cl::opt<unsigned> Limit{
144  "limit",
145  llvm::cl::init(10),
146  llvm::cl::desc("Max results to display"),
147  };
148 
149  void run() override {
150  FuzzyFindRequest Request;
151  Request.Limit = Limit;
152  Request.Query = Query;
153  if (Scopes.getNumOccurrences() > 0) {
154  llvm::SmallVector<llvm::StringRef, 8> Scopes;
155  llvm::StringRef(this->Scopes).split(Scopes, ',');
156  Request.Scopes = {Scopes.begin(), Scopes.end()};
157  }
158  Request.AnyScope = Request.Scopes.empty();
159  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
160  static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
161  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
162  "Symbol Name");
163  size_t Rank = 0;
164  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
165  llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
166  Sym.Scope + Sym.Name);
167  });
168  }
169 };
170 
171 class Lookup : public Command {
172  llvm::cl::opt<std::string> ID{
173  "id",
174  llvm::cl::Positional,
175  llvm::cl::desc("Symbol ID to look up (hex)"),
176  };
177  llvm::cl::opt<std::string> Name{
178  "name",
179  llvm::cl::desc("Qualified name to look up."),
180  };
181 
182  void run() override {
183  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
184  llvm::outs()
185  << "Missing required argument: please provide id or -name.\n";
186  return;
187  }
188  std::vector<SymbolID> IDs;
189  if (ID.getNumOccurrences()) {
190  auto SID = SymbolID::fromStr(ID);
191  if (!SID) {
192  llvm::outs() << llvm::toString(SID.takeError()) << "\n";
193  return;
194  }
195  IDs.push_back(*SID);
196  } else {
197  IDs = getSymbolIDsFromIndex(Name, Index);
198  }
199 
200  LookupRequest Request;
201  Request.IDs.insert(IDs.begin(), IDs.end());
202  bool FoundSymbol = false;
203  Index->lookup(Request, [&](const Symbol &Sym) {
204  FoundSymbol = true;
205  llvm::outs() << toYAML(Sym);
206  });
207  if (!FoundSymbol)
208  llvm::outs() << "not found\n";
209  }
210 };
211 
212 class Refs : public Command {
213  llvm::cl::opt<std::string> ID{
214  "id",
215  llvm::cl::Positional,
216  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
217  };
218  llvm::cl::opt<std::string> Name{
219  "name",
220  llvm::cl::desc("Qualified name of the symbol being queried."),
221  };
222  llvm::cl::opt<std::string> Filter{
223  "filter",
224  llvm::cl::init(".*"),
225  llvm::cl::desc(
226  "Print all results from files matching this regular expression."),
227  };
228 
229  void run() override {
230  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
231  llvm::outs()
232  << "Missing required argument: please provide id or -name.\n";
233  return;
234  }
235  std::vector<SymbolID> IDs;
236  if (ID.getNumOccurrences()) {
237  auto SID = SymbolID::fromStr(ID);
238  if (!SID) {
239  llvm::outs() << llvm::toString(SID.takeError()) << "\n";
240  return;
241  }
242  IDs.push_back(*SID);
243  } else {
244  IDs = getSymbolIDsFromIndex(Name, Index);
245  if (IDs.size() > 1) {
246  llvm::outs() << llvm::formatv(
247  "The name {0} is ambiguous, found {1} different "
248  "symbols. Please use id flag to disambiguate.\n",
249  Name, IDs.size());
250  return;
251  }
252  }
253  RefsRequest RefRequest;
254  RefRequest.IDs.insert(IDs.begin(), IDs.end());
255  llvm::Regex RegexFilter(Filter);
256  Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
257  auto U = URI::parse(R.Location.FileURI);
258  if (!U) {
259  llvm::outs() << U.takeError();
260  return;
261  }
262  if (RegexFilter.match(U->body()))
263  llvm::outs() << R << "\n";
264  });
265  }
266 };
267 
268 class Export : public Command {
269  llvm::cl::opt<IndexFileFormat> Format{
270  "format",
271  llvm::cl::desc("Format of index export"),
272  llvm::cl::values(
273  clEnumValN(IndexFileFormat::YAML, "yaml",
274  "human-readable YAML format"),
275  clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
276  llvm::cl::init(IndexFileFormat::YAML),
277  };
278  llvm::cl::opt<std::string> OutputFile{
279  "output-file",
280  llvm::cl::Positional,
281  llvm::cl::Required,
282  llvm::cl::desc("Output file for export"),
283  };
284 
285 public:
286  void run() {
287  using namespace clang::clangd;
288  // Read input file (as specified in global option)
289  auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
290  if (!Buffer) {
291  llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
292  return;
293  }
294 
295  // Auto-detects input format when parsing
296  auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer());
297  if (!IndexIn) {
298  llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
299  return;
300  }
301 
302  // Prepare output file
303  std::error_code EC;
304  llvm::raw_fd_ostream OutputStream(OutputFile, EC);
305  if (EC) {
306  llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
307  << "\n";
308  return;
309  }
310 
311  // Export
312  clang::clangd::IndexFileOut IndexOut(IndexIn.get());
313  IndexOut.Format = Format;
314  OutputStream << IndexOut;
315  }
316 };
317 
318 struct {
319  const char *Name;
320  const char *Description;
321  std::function<std::unique_ptr<Command>()> Implementation;
322 } CommandInfo[] = {
323  {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
324  {"lookup", "Dump symbol details by ID or qualified name",
325  std::make_unique<Lookup>},
326  {"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
327  {"export", "Export index", std::make_unique<Export>},
328 };
329 
330 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
331  return Index.startswith("remote:")
332  ? remote::getClient(Index.drop_front(strlen("remote:")),
333  ProjectRoot)
334  : loadIndex(Index, /*UseDex=*/true);
335 }
336 
337 bool runCommand(std::string Request, const SymbolIndex &Index) {
338  // Split on spaces and add required null-termination.
339  std::replace(Request.begin(), Request.end(), ' ', '\0');
340  llvm::SmallVector<llvm::StringRef, 8> Args;
341  llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
342  /*KeepEmpty=*/false);
343  if (Args.empty())
344  return false;
345  if (Args.front() == "help") {
346  llvm::outs() << "dexp - Index explorer\nCommands:\n";
347  for (const auto &C : CommandInfo)
348  llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
349  llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
350  return true;
351  }
352  llvm::SmallVector<const char *, 8> FakeArgv;
353  for (llvm::StringRef S : Args)
354  FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
355 
356  for (const auto &Cmd : CommandInfo) {
357  if (Cmd.Name == Args.front())
358  return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
359  Index);
360  }
361  llvm::outs() << "Unknown command. Try 'help'.\n";
362  return false;
363 }
364 
365 } // namespace
366 } // namespace clangd
367 } // namespace clang
368 
369 int main(int argc, const char *argv[]) {
370  using namespace clang::clangd;
371 
372  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
373  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
374  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
375 
376  std::unique_ptr<SymbolIndex> Index;
377  reportTime(llvm::StringRef(IndexLocation).startswith("remote:")
378  ? "Remote index client creation"
379  : "Dex build",
380  [&]() { Index = openIndex(IndexLocation); });
381 
382  if (!Index) {
383  llvm::outs() << "Failed to open the index.\n";
384  return -1;
385  }
386 
387  if (!ExecCommand.empty())
388  return runCommand(ExecCommand, *Index) ? 0 : 1;
389 
390  llvm::LineEditor LE("dexp");
391  while (llvm::Optional<std::string> Request = LE.readLine())
392  runCommand(std::move(*Request), *Index);
393 }
clang::clangd::IndexFileFormat::YAML
Client.h
Dex.h
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:296
clang::clangd::IndexFileFormat::RIFF
clang::clangd::splitQualifiedName
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:489
clang::clangd::readIndexFile
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data)
Definition: Serialization.cpp:657
clang::clangd::IndexFileOut
Definition: Serialization.h:55
clang::clangd::URI::parse
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:163
clang::clangd
Definition: AST.cpp:39
Name
llvm::StringRef Name
Definition: CodeComplete.cpp:160
clang::clangd::IndexFileOut::Format
IndexFileFormat Format
Definition: Serialization.h:62
Implementation
std::function< std::unique_ptr< Command >)> Implementation
Definition: Dexp.cpp:321
clang::clangd::toYAML
std::string toYAML(const Symbol &)
Definition: YAMLSerialization.cpp:487
Description
const char * Description
Definition: Dexp.cpp:320
clang::clangd::loadIndex
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
Definition: Serialization.cpp:668
clang::clangd::replace
static std::string replace(llvm::StringRef Haystack, llvm::StringRef Needle, llvm::StringRef Repl)
Definition: TestIndex.cpp:30
clang::clangd::remote::getClient
std::unique_ptr< clangd::SymbolIndex > getClient(llvm::StringRef Address, llvm::StringRef ProjectRoot)
Returns an SymbolIndex client that passes requests to remote index located at Address.
Definition: Client.cpp:115
Serialization.h
main
int main(int argc, const char *argv[])
Definition: Dexp.cpp:369
Index
const SymbolIndex * Index
Definition: Dexp.cpp:95
clang::clangd::SymbolIndex
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:85
clang::clangd::SymbolID::fromStr
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:35
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:162
clang::tidy::cppcoreguidelines::toString
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
Definition: SpecialMemberFunctionsCheck.cpp:60
clang::clangd::SymbolIndex::fuzzyFind
virtual bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const =0
Matches symbols in the index fuzzily and applies Callback on each matched symbol before returning.