clang-tools  9.0.0
Serialization.cpp
Go to the documentation of this file.
1 //===-- Serialization.cpp - Binary serialization of index data ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Serialization.h"
10 #include "Headers.h"
11 #include "Logger.h"
12 #include "RIFF.h"
13 #include "SymbolLocation.h"
14 #include "SymbolOrigin.h"
15 #include "Trace.h"
16 #include "dex/Dex.h"
17 #include "clang/Tooling/CompilationDatabase.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Compression.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <vector>
24 
25 namespace clang {
26 namespace clangd {
27 namespace {
28 llvm::Error makeError(const llvm::Twine &Msg) {
29  return llvm::make_error<llvm::StringError>(Msg,
30  llvm::inconvertibleErrorCode());
31 }
32 } // namespace
33 
35  // SymbolRole is used to record relations in the index.
36  // Only handle the relations we actually store currently.
37  // If we start storing more relations, this list can be expanded.
38  switch (Role) {
39  case index::SymbolRole::RelationBaseOf:
40  return RelationKind::BaseOf;
41  default:
42  llvm_unreachable("Unsupported symbol role");
43  }
44 }
45 
47  switch (Kind) {
49  return index::SymbolRole::RelationBaseOf;
50  }
51  llvm_unreachable("Invalid relation kind");
52 }
53 
54 namespace {
55 
56 // IO PRIMITIVES
57 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
58 //
59 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
60 // to encode the number, and the top bit to indicate whether more bytes follow.
61 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
62 // This represents 0x1a | 0x2f<<7 = 6042.
63 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
64 
65 // Reads binary data from a StringRef, and keeps track of position.
66 class Reader {
67  const char *Begin, *End;
68  bool Err = false;
69 
70 public:
71  Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
72  // The "error" bit is set by reading past EOF or reading invalid data.
73  // When in an error state, reads may return zero values: callers should check.
74  bool err() const { return Err; }
75  // Did we read all the data, or encounter an error?
76  bool eof() const { return Begin == End || Err; }
77  // All the data we didn't read yet.
78  llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
79 
80  uint8_t consume8() {
81  if (LLVM_UNLIKELY(Begin == End)) {
82  Err = true;
83  return 0;
84  }
85  return *Begin++;
86  }
87 
88  uint32_t consume32() {
89  if (LLVM_UNLIKELY(Begin + 4 > End)) {
90  Err = true;
91  return 0;
92  }
93  auto Ret = llvm::support::endian::read32le(Begin);
94  Begin += 4;
95  return Ret;
96  }
97 
98  llvm::StringRef consume(int N) {
99  if (LLVM_UNLIKELY(Begin + N > End)) {
100  Err = true;
101  return llvm::StringRef();
102  }
103  llvm::StringRef Ret(Begin, N);
104  Begin += N;
105  return Ret;
106  }
107 
108  uint32_t consumeVar() {
109  constexpr static uint8_t More = 1 << 7;
110  uint8_t B = consume8();
111  if (LLVM_LIKELY(!(B & More)))
112  return B;
113  uint32_t Val = B & ~More;
114  for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
115  B = consume8();
116  Val |= (B & ~More) << Shift;
117  }
118  return Val;
119  }
120 
121  llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
122  auto StringIndex = consumeVar();
123  if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
124  Err = true;
125  return llvm::StringRef();
126  }
127  return Strings[StringIndex];
128  }
129 
130  SymbolID consumeID() {
131  llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
132  return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
133  }
134 };
135 
136 void write32(uint32_t I, llvm::raw_ostream &OS) {
137  char Buf[4];
138  llvm::support::endian::write32le(Buf, I);
139  OS.write(Buf, sizeof(Buf));
140 }
141 
142 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
143  constexpr static uint8_t More = 1 << 7;
144  if (LLVM_LIKELY(I < 1 << 7)) {
145  OS.write(I);
146  return;
147  }
148  for (;;) {
149  OS.write(I | More);
150  I >>= 7;
151  if (I < 1 << 7) {
152  OS.write(I);
153  return;
154  }
155  }
156 }
157 
158 // STRING TABLE ENCODING
159 // Index data has many string fields, and many strings are identical.
160 // We store each string once, and refer to them by index.
161 //
162 // The string table's format is:
163 // - UncompressedSize : uint32 (or 0 for no compression)
164 // - CompressedData : byte[CompressedSize]
165 //
166 // CompressedData is a zlib-compressed byte[UncompressedSize].
167 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
168 // These are sorted to improve compression.
169 
170 // Maps each string to a canonical representation.
171 // Strings remain owned externally (e.g. by SymbolSlab).
172 class StringTableOut {
173  llvm::DenseSet<llvm::StringRef> Unique;
174  std::vector<llvm::StringRef> Sorted;
175  // Since strings are interned, look up can be by pointer.
176  llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
177 
178 public:
179  StringTableOut() {
180  // Ensure there's at least one string in the table.
181  // Table size zero is reserved to indicate no compression.
182  Unique.insert("");
183  }
184  // Add a string to the table. Overwrites S if an identical string exists.
185  void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
186  // Finalize the table and write it to OS. No more strings may be added.
187  void finalize(llvm::raw_ostream &OS) {
188  Sorted = {Unique.begin(), Unique.end()};
189  llvm::sort(Sorted);
190  for (unsigned I = 0; I < Sorted.size(); ++I)
191  Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
192 
193  std::string RawTable;
194  for (llvm::StringRef S : Sorted) {
195  RawTable.append(S);
196  RawTable.push_back(0);
197  }
198  if (llvm::zlib::isAvailable()) {
199  llvm::SmallString<1> Compressed;
200  llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
201  write32(RawTable.size(), OS);
202  OS << Compressed;
203  } else {
204  write32(0, OS); // No compression.
205  OS << RawTable;
206  }
207  }
208  // Get the ID of an string, which must be interned. Table must be finalized.
209  unsigned index(llvm::StringRef S) const {
210  assert(!Sorted.empty() && "table not finalized");
211  assert(Index.count({S.data(), S.size()}) && "string not interned");
212  return Index.find({S.data(), S.size()})->second;
213  }
214 };
215 
216 struct StringTableIn {
217  llvm::BumpPtrAllocator Arena;
218  std::vector<llvm::StringRef> Strings;
219 };
220 
221 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
222  Reader R(Data);
223  size_t UncompressedSize = R.consume32();
224  if (R.err())
225  return makeError("Truncated string table");
226 
227  llvm::StringRef Uncompressed;
228  llvm::SmallString<1> UncompressedStorage;
229  if (UncompressedSize == 0) // No compression
230  Uncompressed = R.rest();
231  else {
232  if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
233  UncompressedSize))
234  return std::move(E);
235  Uncompressed = UncompressedStorage;
236  }
237 
238  StringTableIn Table;
239  llvm::StringSaver Saver(Table.Arena);
240  R = Reader(Uncompressed);
241  for (Reader R(Uncompressed); !R.eof();) {
242  auto Len = R.rest().find(0);
243  if (Len == llvm::StringRef::npos)
244  return makeError("Bad string table: not null terminated");
245  Table.Strings.push_back(Saver.save(R.consume(Len)));
246  R.consume8();
247  }
248  if (R.err())
249  return makeError("Truncated string table");
250  return std::move(Table);
251 }
252 
253 // SYMBOL ENCODING
254 // Each field of clangd::Symbol is encoded in turn (see implementation).
255 // - StringRef fields encode as varint (index into the string table)
256 // - enums encode as the underlying type
257 // - most numbers encode as varint
258 
259 void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
260  llvm::raw_ostream &OS) {
261  writeVar(Strings.index(Loc.FileURI), OS);
262  for (const auto &Endpoint : {Loc.Start, Loc.End}) {
263  writeVar(Endpoint.line(), OS);
264  writeVar(Endpoint.column(), OS);
265  }
266 }
267 
268 SymbolLocation readLocation(Reader &Data,
269  llvm::ArrayRef<llvm::StringRef> Strings) {
270  SymbolLocation Loc;
271  Loc.FileURI = Data.consumeString(Strings).data();
272  for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
273  Endpoint->setLine(Data.consumeVar());
274  Endpoint->setColumn(Data.consumeVar());
275  }
276  return Loc;
277 }
278 
279 IncludeGraphNode readIncludeGraphNode(Reader &Data,
280  llvm::ArrayRef<llvm::StringRef> Strings) {
281  IncludeGraphNode IGN;
282  IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
283  IGN.URI = Data.consumeString(Strings);
284  llvm::StringRef Digest = Data.consume(IGN.Digest.size());
285  std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
286  IGN.DirectIncludes.resize(Data.consumeVar());
287  for (llvm::StringRef &Include : IGN.DirectIncludes)
288  Include = Data.consumeString(Strings);
289  return IGN;
290 }
291 
292 void writeIncludeGraphNode(const IncludeGraphNode &IGN,
293  const StringTableOut &Strings,
294  llvm::raw_ostream &OS) {
295  OS.write(static_cast<uint8_t>(IGN.Flags));
296  writeVar(Strings.index(IGN.URI), OS);
297  llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
298  IGN.Digest.size());
299  OS << Hash;
300  writeVar(IGN.DirectIncludes.size(), OS);
301  for (llvm::StringRef Include : IGN.DirectIncludes)
302  writeVar(Strings.index(Include), OS);
303 }
304 
305 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
306  llvm::raw_ostream &OS) {
307  OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
308  // symbol IDs should probably be in a string table.
309  OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
310  OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
311  writeVar(Strings.index(Sym.Name), OS);
312  writeVar(Strings.index(Sym.Scope), OS);
313  writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
314  writeLocation(Sym.Definition, Strings, OS);
315  writeLocation(Sym.CanonicalDeclaration, Strings, OS);
316  writeVar(Sym.References, OS);
317  OS.write(static_cast<uint8_t>(Sym.Flags));
318  OS.write(static_cast<uint8_t>(Sym.Origin));
319  writeVar(Strings.index(Sym.Signature), OS);
320  writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
321  writeVar(Strings.index(Sym.Documentation), OS);
322  writeVar(Strings.index(Sym.ReturnType), OS);
323  writeVar(Strings.index(Sym.Type), OS);
324 
325  auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
326  writeVar(Strings.index(Include.IncludeHeader), OS);
327  writeVar(Include.References, OS);
328  };
329  writeVar(Sym.IncludeHeaders.size(), OS);
330  for (const auto &Include : Sym.IncludeHeaders)
331  WriteInclude(Include);
332 }
333 
334 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
335  Symbol Sym;
336  Sym.ID = Data.consumeID();
337  Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
338  Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
339  Sym.Name = Data.consumeString(Strings);
340  Sym.Scope = Data.consumeString(Strings);
341  Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
342  Sym.Definition = readLocation(Data, Strings);
343  Sym.CanonicalDeclaration = readLocation(Data, Strings);
344  Sym.References = Data.consumeVar();
345  Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
346  Sym.Origin = static_cast<SymbolOrigin>(Data.consume8());
347  Sym.Signature = Data.consumeString(Strings);
348  Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
349  Sym.Documentation = Data.consumeString(Strings);
350  Sym.ReturnType = Data.consumeString(Strings);
351  Sym.Type = Data.consumeString(Strings);
352  Sym.IncludeHeaders.resize(Data.consumeVar());
353  for (auto &I : Sym.IncludeHeaders) {
354  I.IncludeHeader = Data.consumeString(Strings);
355  I.References = Data.consumeVar();
356  }
357  return Sym;
358 }
359 
360 // REFS ENCODING
361 // A refs section has data grouped by Symbol. Each symbol has:
362 // - SymbolID: 8 bytes
363 // - NumRefs: varint
364 // - Ref[NumRefs]
365 // Fields of Ref are encoded in turn, see implementation.
366 
367 void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
368  const StringTableOut &Strings, llvm::raw_ostream &OS) {
369  OS << ID.raw();
370  writeVar(Refs.size(), OS);
371  for (const auto &Ref : Refs) {
372  OS.write(static_cast<unsigned char>(Ref.Kind));
373  writeLocation(Ref.Location, Strings, OS);
374  }
375 }
376 
377 std::pair<SymbolID, std::vector<Ref>>
378 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
379  std::pair<SymbolID, std::vector<Ref>> Result;
380  Result.first = Data.consumeID();
381  Result.second.resize(Data.consumeVar());
382  for (auto &Ref : Result.second) {
383  Ref.Kind = static_cast<RefKind>(Data.consume8());
384  Ref.Location = readLocation(Data, Strings);
385  }
386  return Result;
387 }
388 
389 // RELATIONS ENCODING
390 // A relations section is a flat list of relations. Each relation has:
391 // - SymbolID (subject): 8 bytes
392 // - relation kind (predicate): 1 byte
393 // - SymbolID (object): 8 bytes
394 // In the future, we might prefer a packed representation if the need arises.
395 
396 void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
397  OS << R.Subject.raw();
399  OS.write(static_cast<uint8_t>(Kind));
400  OS << R.Object.raw();
401 }
402 
403 Relation readRelation(Reader &Data) {
404  SymbolID Subject = Data.consumeID();
405  index::SymbolRole Predicate =
406  relationKindToSymbolRole(static_cast<RelationKind>(Data.consume8()));
407  SymbolID Object = Data.consumeID();
408  return {Subject, Predicate, Object};
409 }
410 
411 struct InternedCompileCommand {
412  llvm::StringRef Directory;
413  std::vector<llvm::StringRef> CommandLine;
414 };
415 
416 void writeCompileCommand(const InternedCompileCommand &Cmd,
417  const StringTableOut &Strings,
418  llvm::raw_ostream &CmdOS) {
419  writeVar(Strings.index(Cmd.Directory), CmdOS);
420  writeVar(Cmd.CommandLine.size(), CmdOS);
421  for (llvm::StringRef C : Cmd.CommandLine)
422  writeVar(Strings.index(C), CmdOS);
423 }
424 
425 InternedCompileCommand
426 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
427  InternedCompileCommand Cmd;
428  Cmd.Directory = CmdReader.consumeString(Strings);
429  Cmd.CommandLine.resize(CmdReader.consumeVar());
430  for (llvm::StringRef &C : Cmd.CommandLine)
431  C = CmdReader.consumeString(Strings);
432  return Cmd;
433 }
434 
435 // FILE ENCODING
436 // A file is a RIFF chunk with type 'CdIx'.
437 // It contains the sections:
438 // - meta: version number
439 // - srcs: information related to include graph
440 // - stri: string table
441 // - symb: symbols
442 // - refs: references to symbols
443 
444 // The current versioning scheme is simple - non-current versions are rejected.
445 // If you make a breaking change, bump this version number to invalidate stored
446 // data. Later we may want to support some backward compatibility.
447 constexpr static uint32_t Version = 12;
448 
449 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
450  auto RIFF = riff::readFile(Data);
451  if (!RIFF)
452  return RIFF.takeError();
453  if (RIFF->Type != riff::fourCC("CdIx"))
454  return makeError("wrong RIFF type");
455  llvm::StringMap<llvm::StringRef> Chunks;
456  for (const auto &Chunk : RIFF->Chunks)
457  Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
458  Chunk.Data);
459 
460  for (llvm::StringRef RequiredChunk : {"meta", "stri"})
461  if (!Chunks.count(RequiredChunk))
462  return makeError("missing required chunk " + RequiredChunk);
463 
464  Reader Meta(Chunks.lookup("meta"));
465  if (Meta.consume32() != Version)
466  return makeError("wrong version");
467 
468  auto Strings = readStringTable(Chunks.lookup("stri"));
469  if (!Strings)
470  return Strings.takeError();
471 
472  IndexFileIn Result;
473  if (Chunks.count("srcs")) {
474  Reader SrcsReader(Chunks.lookup("srcs"));
475  Result.Sources.emplace();
476  while (!SrcsReader.eof()) {
477  auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
478  auto Entry = Result.Sources->try_emplace(IGN.URI).first;
479  Entry->getValue() = std::move(IGN);
480  // We change all the strings inside the structure to point at the keys in
481  // the map, since it is the only copy of the string that's going to live.
482  Entry->getValue().URI = Entry->getKey();
483  for (auto &Include : Entry->getValue().DirectIncludes)
484  Include = Result.Sources->try_emplace(Include).first->getKey();
485  }
486  if (SrcsReader.err())
487  return makeError("malformed or truncated include uri");
488  }
489 
490  if (Chunks.count("symb")) {
491  Reader SymbolReader(Chunks.lookup("symb"));
493  while (!SymbolReader.eof())
494  Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
495  if (SymbolReader.err())
496  return makeError("malformed or truncated symbol");
497  Result.Symbols = std::move(Symbols).build();
498  }
499  if (Chunks.count("refs")) {
500  Reader RefsReader(Chunks.lookup("refs"));
502  while (!RefsReader.eof()) {
503  auto RefsBundle = readRefs(RefsReader, Strings->Strings);
504  for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
505  Refs.insert(RefsBundle.first, Ref);
506  }
507  if (RefsReader.err())
508  return makeError("malformed or truncated refs");
509  Result.Refs = std::move(Refs).build();
510  }
511  if (Chunks.count("rela")) {
512  Reader RelationsReader(Chunks.lookup("rela"));
514  while (!RelationsReader.eof()) {
515  auto Relation = readRelation(RelationsReader);
516  Relations.insert(Relation);
517  }
518  if (RelationsReader.err())
519  return makeError("malformed or truncated relations");
520  Result.Relations = std::move(Relations).build();
521  }
522  if (Chunks.count("cmdl")) {
523  Reader CmdReader(Chunks.lookup("cmdl"));
524  if (CmdReader.err())
525  return makeError("malformed or truncated commandline section");
526  InternedCompileCommand Cmd =
527  readCompileCommand(CmdReader, Strings->Strings);
528  Result.Cmd.emplace();
529  Result.Cmd->Directory = Cmd.Directory;
530  Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
531  for (llvm::StringRef C : Cmd.CommandLine)
532  Result.Cmd->CommandLine.emplace_back(C);
533  }
534  return std::move(Result);
535 }
536 
537 template <class Callback>
538 void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
539  CB(IGN.URI);
540  for (llvm::StringRef &Include : IGN.DirectIncludes)
541  CB(Include);
542 }
543 
544 void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
545  assert(Data.Symbols && "An index file without symbols makes no sense!");
546  riff::File RIFF;
547  RIFF.Type = riff::fourCC("CdIx");
548 
549  llvm::SmallString<4> Meta;
550  {
551  llvm::raw_svector_ostream MetaOS(Meta);
552  write32(Version, MetaOS);
553  }
554  RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
555 
556  StringTableOut Strings;
557  std::vector<Symbol> Symbols;
558  for (const auto &Sym : *Data.Symbols) {
559  Symbols.emplace_back(Sym);
560  visitStrings(Symbols.back(),
561  [&](llvm::StringRef &S) { Strings.intern(S); });
562  }
563  std::vector<IncludeGraphNode> Sources;
564  if (Data.Sources)
565  for (const auto &Source : *Data.Sources) {
566  Sources.push_back(Source.getValue());
567  visitStrings(Sources.back(),
568  [&](llvm::StringRef &S) { Strings.intern(S); });
569  }
570 
571  std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
572  if (Data.Refs) {
573  for (const auto &Sym : *Data.Refs) {
574  Refs.emplace_back(Sym);
575  for (auto &Ref : Refs.back().second) {
576  llvm::StringRef File = Ref.Location.FileURI;
577  Strings.intern(File);
578  Ref.Location.FileURI = File.data();
579  }
580  }
581  }
582 
583  std::vector<Relation> Relations;
584  if (Data.Relations) {
585  for (const auto &Relation : *Data.Relations) {
586  Relations.emplace_back(Relation);
587  // No strings to be interned in relations.
588  }
589  }
590 
591  InternedCompileCommand InternedCmd;
592  if (Data.Cmd) {
593  InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
594  InternedCmd.Directory = Data.Cmd->Directory;
595  Strings.intern(InternedCmd.Directory);
596  for (llvm::StringRef C : Data.Cmd->CommandLine) {
597  InternedCmd.CommandLine.emplace_back(C);
598  Strings.intern(InternedCmd.CommandLine.back());
599  }
600  }
601 
602  std::string StringSection;
603  {
604  llvm::raw_string_ostream StringOS(StringSection);
605  Strings.finalize(StringOS);
606  }
607  RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
608 
609  std::string SymbolSection;
610  {
611  llvm::raw_string_ostream SymbolOS(SymbolSection);
612  for (const auto &Sym : Symbols)
613  writeSymbol(Sym, Strings, SymbolOS);
614  }
615  RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
616 
617  std::string RefsSection;
618  if (Data.Refs) {
619  {
620  llvm::raw_string_ostream RefsOS(RefsSection);
621  for (const auto &Sym : Refs)
622  writeRefs(Sym.first, Sym.second, Strings, RefsOS);
623  }
624  RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
625  }
626 
627  std::string RelationSection;
628  if (Data.Relations) {
629  {
630  llvm::raw_string_ostream RelationOS{RelationSection};
631  for (const auto &Relation : Relations)
632  writeRelation(Relation, RelationOS);
633  }
634  RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
635  }
636 
637  std::string SrcsSection;
638  {
639  {
640  llvm::raw_string_ostream SrcsOS(SrcsSection);
641  for (const auto &SF : Sources)
642  writeIncludeGraphNode(SF, Strings, SrcsOS);
643  }
644  RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
645  }
646 
647  std::string CmdlSection;
648  if (Data.Cmd) {
649  {
650  llvm::raw_string_ostream CmdOS(CmdlSection);
651  writeCompileCommand(InternedCmd, Strings, CmdOS);
652  }
653  RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
654  }
655 
656  OS << RIFF;
657 }
658 
659 } // namespace
660 
661 // Defined in YAMLSerialization.cpp.
662 void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
663 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef);
664 
665 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
666  switch (O.Format) {
668  writeRIFF(O, OS);
669  break;
671  writeYAML(O, OS);
672  break;
673  }
674  return OS;
675 }
676 
677 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data) {
678  if (Data.startswith("RIFF")) {
679  return readRIFF(Data);
680  } else if (auto YAMLContents = readYAML(Data)) {
681  return std::move(*YAMLContents);
682  } else {
683  return makeError("Not a RIFF file and failed to parse as YAML: " +
684  llvm::toString(YAMLContents.takeError()));
685  }
686 }
687 
688 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
689  bool UseDex) {
690  trace::Span OverallTracer("LoadIndex");
691  auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
692  if (!Buffer) {
693  elog("Can't open {0}", SymbolFilename);
694  return nullptr;
695  }
696 
698  RefSlab Refs;
700  {
701  trace::Span Tracer("ParseIndex");
702  if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
703  if (I->Symbols)
704  Symbols = std::move(*I->Symbols);
705  if (I->Refs)
706  Refs = std::move(*I->Refs);
707  if (I->Relations)
708  Relations = std::move(*I->Relations);
709  } else {
710  elog("Bad Index: {0}", I.takeError());
711  return nullptr;
712  }
713  }
714 
715  size_t NumSym = Symbols.size();
716  size_t NumRefs = Refs.numRefs();
717  size_t NumRelations = Relations.size();
718 
719  trace::Span Tracer("BuildIndex");
720  auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
721  std::move(Relations))
722  : MemIndex::build(std::move(Symbols), std::move(Refs),
723  std::move(Relations));
724  vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
725  " - number of symbols: {3}\n"
726  " - number of refs: {4}\n"
727  " - numnber of relations: {5}",
728  UseDex ? "Dex" : "MemIndex", SymbolFilename,
729  Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
730  return Index;
731 }
732 
733 } // namespace clangd
734 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef)
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
static SymbolID fromRaw(llvm::StringRef)
Definition: SymbolID.cpp:26
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:69
static llvm::Error makeError(const char *Msg)
Definition: RIFF.cpp:16
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
RelationKind symbolRoleToRelationKind(index::SymbolRole Role)
clang::find_all_symbols::SymbolInfo::SymbolKind SymbolKind
Definition: SymbolInfo.cpp:21
constexpr FourCC fourCC(const char(&Literal)[5])
Definition: RIFF.h:44
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
llvm::Expected< File > readFile(llvm::StringRef Stream)
Definition: RIFF.cpp:51
size_t numRefs() const
Definition: Ref.h:84
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data)
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:56
std::vector< llvm::StringRef > CommandLine
static constexpr size_t RawSize
Definition: SymbolID.h:45
BindArgumentKind Kind
llvm::BumpPtrAllocator Arena
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:350
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
Definition: MemIndex.cpp:19
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:26
index::SymbolRoleSet Role
Definition: XRefs.cpp:375
RelationSlab Relations
RefKind
Describes the kind of a cross-reference.
Definition: Ref.h:28
SymbolSlab Symbols
llvm::StringRef Directory
std::vector< llvm::StringRef > Strings
CodeCompletionBuilder Builder
index::SymbolRole relationKindToSymbolRole(RelationKind Kind)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
Definition: Rename.cpp:36
size_t size() const
Definition: Relation.h:53
RefSlab Refs
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:82
size_t size() const
Definition: Symbol.h:189
std::array< uint8_t, 20 > SymbolID
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
Definition: Symbol.h:147
const SymbolIndex * Index
Definition: Dexp.cpp:84