clang-tools  11.0.0
Serialization.cpp
Go to the documentation of this file.
1 //===-- Serialization.cpp - Binary serialization of index data ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Serialization.h"
10 #include "Headers.h"
11 #include "RIFF.h"
12 #include "SymbolLocation.h"
13 #include "SymbolOrigin.h"
14 #include "dex/Dex.h"
15 #include "support/Logger.h"
16 #include "support/Trace.h"
17 #include "clang/Tooling/CompilationDatabase.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Compression.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <vector>
24 
25 namespace clang {
26 namespace clangd {
27 namespace {
28 llvm::Error makeError(const llvm::Twine &Msg) {
29  return llvm::make_error<llvm::StringError>(Msg,
30  llvm::inconvertibleErrorCode());
31 }
32 
33 // IO PRIMITIVES
34 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
35 //
36 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
37 // to encode the number, and the top bit to indicate whether more bytes follow.
38 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
39 // This represents 0x1a | 0x2f<<7 = 6042.
40 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
41 
42 // Reads binary data from a StringRef, and keeps track of position.
43 class Reader {
44  const char *Begin, *End;
45  bool Err = false;
46 
47 public:
48  Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
49  // The "error" bit is set by reading past EOF or reading invalid data.
50  // When in an error state, reads may return zero values: callers should check.
51  bool err() const { return Err; }
52  // Did we read all the data, or encounter an error?
53  bool eof() const { return Begin == End || Err; }
54  // All the data we didn't read yet.
55  llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
56 
57  uint8_t consume8() {
58  if (LLVM_UNLIKELY(Begin == End)) {
59  Err = true;
60  return 0;
61  }
62  return *Begin++;
63  }
64 
65  uint32_t consume32() {
66  if (LLVM_UNLIKELY(Begin + 4 > End)) {
67  Err = true;
68  return 0;
69  }
70  auto Ret = llvm::support::endian::read32le(Begin);
71  Begin += 4;
72  return Ret;
73  }
74 
75  llvm::StringRef consume(int N) {
76  if (LLVM_UNLIKELY(Begin + N > End)) {
77  Err = true;
78  return llvm::StringRef();
79  }
80  llvm::StringRef Ret(Begin, N);
81  Begin += N;
82  return Ret;
83  }
84 
85  uint32_t consumeVar() {
86  constexpr static uint8_t More = 1 << 7;
87  uint8_t B = consume8();
88  if (LLVM_LIKELY(!(B & More)))
89  return B;
90  uint32_t Val = B & ~More;
91  for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
92  B = consume8();
93  Val |= (B & ~More) << Shift;
94  }
95  return Val;
96  }
97 
98  llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
99  auto StringIndex = consumeVar();
100  if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
101  Err = true;
102  return llvm::StringRef();
103  }
104  return Strings[StringIndex];
105  }
106 
107  SymbolID consumeID() {
108  llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
109  return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
110  }
111 };
112 
113 void write32(uint32_t I, llvm::raw_ostream &OS) {
114  char Buf[4];
115  llvm::support::endian::write32le(Buf, I);
116  OS.write(Buf, sizeof(Buf));
117 }
118 
119 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
120  constexpr static uint8_t More = 1 << 7;
121  if (LLVM_LIKELY(I < 1 << 7)) {
122  OS.write(I);
123  return;
124  }
125  for (;;) {
126  OS.write(I | More);
127  I >>= 7;
128  if (I < 1 << 7) {
129  OS.write(I);
130  return;
131  }
132  }
133 }
134 
135 // STRING TABLE ENCODING
136 // Index data has many string fields, and many strings are identical.
137 // We store each string once, and refer to them by index.
138 //
139 // The string table's format is:
140 // - UncompressedSize : uint32 (or 0 for no compression)
141 // - CompressedData : byte[CompressedSize]
142 //
143 // CompressedData is a zlib-compressed byte[UncompressedSize].
144 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
145 // These are sorted to improve compression.
146 
147 // Maps each string to a canonical representation.
148 // Strings remain owned externally (e.g. by SymbolSlab).
149 class StringTableOut {
150  llvm::DenseSet<llvm::StringRef> Unique;
151  std::vector<llvm::StringRef> Sorted;
152  // Since strings are interned, look up can be by pointer.
153  llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
154 
155 public:
156  StringTableOut() {
157  // Ensure there's at least one string in the table.
158  // Table size zero is reserved to indicate no compression.
159  Unique.insert("");
160  }
161  // Add a string to the table. Overwrites S if an identical string exists.
162  void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
163  // Finalize the table and write it to OS. No more strings may be added.
164  void finalize(llvm::raw_ostream &OS) {
165  Sorted = {Unique.begin(), Unique.end()};
166  llvm::sort(Sorted);
167  for (unsigned I = 0; I < Sorted.size(); ++I)
168  Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
169 
170  std::string RawTable;
171  for (llvm::StringRef S : Sorted) {
172  RawTable.append(std::string(S));
173  RawTable.push_back(0);
174  }
175  if (llvm::zlib::isAvailable()) {
176  llvm::SmallString<1> Compressed;
177  llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
178  write32(RawTable.size(), OS);
179  OS << Compressed;
180  } else {
181  write32(0, OS); // No compression.
182  OS << RawTable;
183  }
184  }
185  // Get the ID of an string, which must be interned. Table must be finalized.
186  unsigned index(llvm::StringRef S) const {
187  assert(!Sorted.empty() && "table not finalized");
188  assert(Index.count({S.data(), S.size()}) && "string not interned");
189  return Index.find({S.data(), S.size()})->second;
190  }
191 };
192 
193 struct StringTableIn {
194  llvm::BumpPtrAllocator Arena;
195  std::vector<llvm::StringRef> Strings;
196 };
197 
198 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
199  Reader R(Data);
200  size_t UncompressedSize = R.consume32();
201  if (R.err())
202  return makeError("Truncated string table");
203 
204  llvm::StringRef Uncompressed;
205  llvm::SmallString<1> UncompressedStorage;
206  if (UncompressedSize == 0) // No compression
207  Uncompressed = R.rest();
208  else {
209  if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
210  UncompressedSize))
211  return std::move(E);
212  Uncompressed = UncompressedStorage;
213  }
214 
215  StringTableIn Table;
216  llvm::StringSaver Saver(Table.Arena);
217  R = Reader(Uncompressed);
218  for (Reader R(Uncompressed); !R.eof();) {
219  auto Len = R.rest().find(0);
220  if (Len == llvm::StringRef::npos)
221  return makeError("Bad string table: not null terminated");
222  Table.Strings.push_back(Saver.save(R.consume(Len)));
223  R.consume8();
224  }
225  if (R.err())
226  return makeError("Truncated string table");
227  return std::move(Table);
228 }
229 
230 // SYMBOL ENCODING
231 // Each field of clangd::Symbol is encoded in turn (see implementation).
232 // - StringRef fields encode as varint (index into the string table)
233 // - enums encode as the underlying type
234 // - most numbers encode as varint
235 
236 void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
237  llvm::raw_ostream &OS) {
238  writeVar(Strings.index(Loc.FileURI), OS);
239  for (const auto &Endpoint : {Loc.Start, Loc.End}) {
240  writeVar(Endpoint.line(), OS);
241  writeVar(Endpoint.column(), OS);
242  }
243 }
244 
245 SymbolLocation readLocation(Reader &Data,
246  llvm::ArrayRef<llvm::StringRef> Strings) {
247  SymbolLocation Loc;
248  Loc.FileURI = Data.consumeString(Strings).data();
249  for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
250  Endpoint->setLine(Data.consumeVar());
251  Endpoint->setColumn(Data.consumeVar());
252  }
253  return Loc;
254 }
255 
256 IncludeGraphNode readIncludeGraphNode(Reader &Data,
257  llvm::ArrayRef<llvm::StringRef> Strings) {
258  IncludeGraphNode IGN;
259  IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
260  IGN.URI = Data.consumeString(Strings);
261  llvm::StringRef Digest = Data.consume(IGN.Digest.size());
262  std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
263  IGN.DirectIncludes.resize(Data.consumeVar());
264  for (llvm::StringRef &Include : IGN.DirectIncludes)
265  Include = Data.consumeString(Strings);
266  return IGN;
267 }
268 
269 void writeIncludeGraphNode(const IncludeGraphNode &IGN,
270  const StringTableOut &Strings,
271  llvm::raw_ostream &OS) {
272  OS.write(static_cast<uint8_t>(IGN.Flags));
273  writeVar(Strings.index(IGN.URI), OS);
274  llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
275  IGN.Digest.size());
276  OS << Hash;
277  writeVar(IGN.DirectIncludes.size(), OS);
278  for (llvm::StringRef Include : IGN.DirectIncludes)
279  writeVar(Strings.index(Include), OS);
280 }
281 
282 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
283  llvm::raw_ostream &OS) {
284  OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
285  // symbol IDs should probably be in a string table.
286  OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
287  OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
288  writeVar(Strings.index(Sym.Name), OS);
289  writeVar(Strings.index(Sym.Scope), OS);
290  writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
291  writeLocation(Sym.Definition, Strings, OS);
292  writeLocation(Sym.CanonicalDeclaration, Strings, OS);
293  writeVar(Sym.References, OS);
294  OS.write(static_cast<uint8_t>(Sym.Flags));
295  OS.write(static_cast<uint8_t>(Sym.Origin));
296  writeVar(Strings.index(Sym.Signature), OS);
297  writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
298  writeVar(Strings.index(Sym.Documentation), OS);
299  writeVar(Strings.index(Sym.ReturnType), OS);
300  writeVar(Strings.index(Sym.Type), OS);
301 
302  auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
303  writeVar(Strings.index(Include.IncludeHeader), OS);
304  writeVar(Include.References, OS);
305  };
306  writeVar(Sym.IncludeHeaders.size(), OS);
307  for (const auto &Include : Sym.IncludeHeaders)
308  WriteInclude(Include);
309 }
310 
311 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
312  Symbol Sym;
313  Sym.ID = Data.consumeID();
314  Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
315  Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
316  Sym.Name = Data.consumeString(Strings);
317  Sym.Scope = Data.consumeString(Strings);
318  Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
319  Sym.Definition = readLocation(Data, Strings);
320  Sym.CanonicalDeclaration = readLocation(Data, Strings);
321  Sym.References = Data.consumeVar();
322  Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
323  Sym.Origin = static_cast<SymbolOrigin>(Data.consume8());
324  Sym.Signature = Data.consumeString(Strings);
325  Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
326  Sym.Documentation = Data.consumeString(Strings);
327  Sym.ReturnType = Data.consumeString(Strings);
328  Sym.Type = Data.consumeString(Strings);
329  Sym.IncludeHeaders.resize(Data.consumeVar());
330  for (auto &I : Sym.IncludeHeaders) {
331  I.IncludeHeader = Data.consumeString(Strings);
332  I.References = Data.consumeVar();
333  }
334  return Sym;
335 }
336 
337 // REFS ENCODING
338 // A refs section has data grouped by Symbol. Each symbol has:
339 // - SymbolID: 8 bytes
340 // - NumRefs: varint
341 // - Ref[NumRefs]
342 // Fields of Ref are encoded in turn, see implementation.
343 
344 void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
345  const StringTableOut &Strings, llvm::raw_ostream &OS) {
346  OS << ID.raw();
347  writeVar(Refs.size(), OS);
348  for (const auto &Ref : Refs) {
349  OS.write(static_cast<unsigned char>(Ref.Kind));
350  writeLocation(Ref.Location, Strings, OS);
351  }
352 }
353 
354 std::pair<SymbolID, std::vector<Ref>>
355 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
356  std::pair<SymbolID, std::vector<Ref>> Result;
357  Result.first = Data.consumeID();
358  Result.second.resize(Data.consumeVar());
359  for (auto &Ref : Result.second) {
360  Ref.Kind = static_cast<RefKind>(Data.consume8());
361  Ref.Location = readLocation(Data, Strings);
362  }
363  return Result;
364 }
365 
366 // RELATIONS ENCODING
367 // A relations section is a flat list of relations. Each relation has:
368 // - SymbolID (subject): 8 bytes
369 // - relation kind (predicate): 1 byte
370 // - SymbolID (object): 8 bytes
371 // In the future, we might prefer a packed representation if the need arises.
372 
373 void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
374  OS << R.Subject.raw();
375  OS.write(static_cast<uint8_t>(R.Predicate));
376  OS << R.Object.raw();
377 }
378 
379 Relation readRelation(Reader &Data) {
380  SymbolID Subject = Data.consumeID();
381  RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
382  SymbolID Object = Data.consumeID();
383  return {Subject, Predicate, Object};
384 }
385 
386 struct InternedCompileCommand {
387  llvm::StringRef Directory;
388  std::vector<llvm::StringRef> CommandLine;
389 };
390 
391 void writeCompileCommand(const InternedCompileCommand &Cmd,
392  const StringTableOut &Strings,
393  llvm::raw_ostream &CmdOS) {
394  writeVar(Strings.index(Cmd.Directory), CmdOS);
395  writeVar(Cmd.CommandLine.size(), CmdOS);
396  for (llvm::StringRef C : Cmd.CommandLine)
397  writeVar(Strings.index(C), CmdOS);
398 }
399 
400 InternedCompileCommand
401 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
402  InternedCompileCommand Cmd;
403  Cmd.Directory = CmdReader.consumeString(Strings);
404  Cmd.CommandLine.resize(CmdReader.consumeVar());
405  for (llvm::StringRef &C : Cmd.CommandLine)
406  C = CmdReader.consumeString(Strings);
407  return Cmd;
408 }
409 
410 // FILE ENCODING
411 // A file is a RIFF chunk with type 'CdIx'.
412 // It contains the sections:
413 // - meta: version number
414 // - srcs: information related to include graph
415 // - stri: string table
416 // - symb: symbols
417 // - refs: references to symbols
418 
419 // The current versioning scheme is simple - non-current versions are rejected.
420 // If you make a breaking change, bump this version number to invalidate stored
421 // data. Later we may want to support some backward compatibility.
422 constexpr static uint32_t Version = 13;
423 
424 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
425  auto RIFF = riff::readFile(Data);
426  if (!RIFF)
427  return RIFF.takeError();
428  if (RIFF->Type != riff::fourCC("CdIx"))
429  return makeError("wrong RIFF filetype: " + riff::fourCCStr(RIFF->Type));
430  llvm::StringMap<llvm::StringRef> Chunks;
431  for (const auto &Chunk : RIFF->Chunks)
432  Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
433  Chunk.Data);
434 
435  if (!Chunks.count("meta"))
436  return makeError("missing meta chunk");
437  Reader Meta(Chunks.lookup("meta"));
438  auto SeenVersion = Meta.consume32();
439  if (SeenVersion != Version)
440  return makeError("wrong version: want " + llvm::Twine(Version) + ", got " +
441  llvm::Twine(SeenVersion));
442 
443  // meta chunk is checked above, as we prefer the "version mismatch" error.
444  for (llvm::StringRef RequiredChunk : {"stri"})
445  if (!Chunks.count(RequiredChunk))
446  return makeError("missing required chunk " + RequiredChunk);
447 
448  auto Strings = readStringTable(Chunks.lookup("stri"));
449  if (!Strings)
450  return Strings.takeError();
451 
452  IndexFileIn Result;
453  if (Chunks.count("srcs")) {
454  Reader SrcsReader(Chunks.lookup("srcs"));
455  Result.Sources.emplace();
456  while (!SrcsReader.eof()) {
457  auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
458  auto Entry = Result.Sources->try_emplace(IGN.URI).first;
459  Entry->getValue() = std::move(IGN);
460  // We change all the strings inside the structure to point at the keys in
461  // the map, since it is the only copy of the string that's going to live.
462  Entry->getValue().URI = Entry->getKey();
463  for (auto &Include : Entry->getValue().DirectIncludes)
464  Include = Result.Sources->try_emplace(Include).first->getKey();
465  }
466  if (SrcsReader.err())
467  return makeError("malformed or truncated include uri");
468  }
469 
470  if (Chunks.count("symb")) {
471  Reader SymbolReader(Chunks.lookup("symb"));
473  while (!SymbolReader.eof())
474  Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
475  if (SymbolReader.err())
476  return makeError("malformed or truncated symbol");
477  Result.Symbols = std::move(Symbols).build();
478  }
479  if (Chunks.count("refs")) {
480  Reader RefsReader(Chunks.lookup("refs"));
482  while (!RefsReader.eof()) {
483  auto RefsBundle = readRefs(RefsReader, Strings->Strings);
484  for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
485  Refs.insert(RefsBundle.first, Ref);
486  }
487  if (RefsReader.err())
488  return makeError("malformed or truncated refs");
489  Result.Refs = std::move(Refs).build();
490  }
491  if (Chunks.count("rela")) {
492  Reader RelationsReader(Chunks.lookup("rela"));
493  RelationSlab::Builder Relations;
494  while (!RelationsReader.eof()) {
495  auto Relation = readRelation(RelationsReader);
496  Relations.insert(Relation);
497  }
498  if (RelationsReader.err())
499  return makeError("malformed or truncated relations");
500  Result.Relations = std::move(Relations).build();
501  }
502  if (Chunks.count("cmdl")) {
503  Reader CmdReader(Chunks.lookup("cmdl"));
504  if (CmdReader.err())
505  return makeError("malformed or truncated commandline section");
506  InternedCompileCommand Cmd =
507  readCompileCommand(CmdReader, Strings->Strings);
508  Result.Cmd.emplace();
509  Result.Cmd->Directory = std::string(Cmd.Directory);
510  Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
511  for (llvm::StringRef C : Cmd.CommandLine)
512  Result.Cmd->CommandLine.emplace_back(C);
513  }
514  return std::move(Result);
515 }
516 
517 template <class Callback>
518 void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
519  CB(IGN.URI);
520  for (llvm::StringRef &Include : IGN.DirectIncludes)
521  CB(Include);
522 }
523 
524 void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
525  assert(Data.Symbols && "An index file without symbols makes no sense!");
526  riff::File RIFF;
527  RIFF.Type = riff::fourCC("CdIx");
528 
529  llvm::SmallString<4> Meta;
530  {
531  llvm::raw_svector_ostream MetaOS(Meta);
532  write32(Version, MetaOS);
533  }
534  RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
535 
536  StringTableOut Strings;
537  std::vector<Symbol> Symbols;
538  for (const auto &Sym : *Data.Symbols) {
539  Symbols.emplace_back(Sym);
540  visitStrings(Symbols.back(),
541  [&](llvm::StringRef &S) { Strings.intern(S); });
542  }
543  std::vector<IncludeGraphNode> Sources;
544  if (Data.Sources)
545  for (const auto &Source : *Data.Sources) {
546  Sources.push_back(Source.getValue());
547  visitStrings(Sources.back(),
548  [&](llvm::StringRef &S) { Strings.intern(S); });
549  }
550 
551  std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
552  if (Data.Refs) {
553  for (const auto &Sym : *Data.Refs) {
554  Refs.emplace_back(Sym);
555  for (auto &Ref : Refs.back().second) {
556  llvm::StringRef File = Ref.Location.FileURI;
557  Strings.intern(File);
558  Ref.Location.FileURI = File.data();
559  }
560  }
561  }
562 
563  std::vector<Relation> Relations;
564  if (Data.Relations) {
565  for (const auto &Relation : *Data.Relations) {
566  Relations.emplace_back(Relation);
567  // No strings to be interned in relations.
568  }
569  }
570 
571  InternedCompileCommand InternedCmd;
572  if (Data.Cmd) {
573  InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
574  InternedCmd.Directory = Data.Cmd->Directory;
575  Strings.intern(InternedCmd.Directory);
576  for (llvm::StringRef C : Data.Cmd->CommandLine) {
577  InternedCmd.CommandLine.emplace_back(C);
578  Strings.intern(InternedCmd.CommandLine.back());
579  }
580  }
581 
582  std::string StringSection;
583  {
584  llvm::raw_string_ostream StringOS(StringSection);
585  Strings.finalize(StringOS);
586  }
587  RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
588 
589  std::string SymbolSection;
590  {
591  llvm::raw_string_ostream SymbolOS(SymbolSection);
592  for (const auto &Sym : Symbols)
593  writeSymbol(Sym, Strings, SymbolOS);
594  }
595  RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
596 
597  std::string RefsSection;
598  if (Data.Refs) {
599  {
600  llvm::raw_string_ostream RefsOS(RefsSection);
601  for (const auto &Sym : Refs)
602  writeRefs(Sym.first, Sym.second, Strings, RefsOS);
603  }
604  RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
605  }
606 
607  std::string RelationSection;
608  if (Data.Relations) {
609  {
610  llvm::raw_string_ostream RelationOS{RelationSection};
611  for (const auto &Relation : Relations)
612  writeRelation(Relation, RelationOS);
613  }
614  RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
615  }
616 
617  std::string SrcsSection;
618  {
619  {
620  llvm::raw_string_ostream SrcsOS(SrcsSection);
621  for (const auto &SF : Sources)
622  writeIncludeGraphNode(SF, Strings, SrcsOS);
623  }
624  RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
625  }
626 
627  std::string CmdlSection;
628  if (Data.Cmd) {
629  {
630  llvm::raw_string_ostream CmdOS(CmdlSection);
631  writeCompileCommand(InternedCmd, Strings, CmdOS);
632  }
633  RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
634  }
635 
636  OS << RIFF;
637 }
638 
639 } // namespace
640 
641 // Defined in YAMLSerialization.cpp.
642 void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
643 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef);
644 
645 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
646  switch (O.Format) {
648  writeRIFF(O, OS);
649  break;
651  writeYAML(O, OS);
652  break;
653  }
654  return OS;
655 }
656 
657 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data) {
658  if (Data.startswith("RIFF")) {
659  return readRIFF(Data);
660  } else if (auto YAMLContents = readYAML(Data)) {
661  return std::move(*YAMLContents);
662  } else {
663  return makeError("Not a RIFF file and failed to parse as YAML: " +
664  llvm::toString(YAMLContents.takeError()));
665  }
666 }
667 
668 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
669  bool UseDex) {
670  trace::Span OverallTracer("LoadIndex");
671  auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
672  if (!Buffer) {
673  elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
674  return nullptr;
675  }
676 
678  RefSlab Refs;
679  RelationSlab Relations;
680  {
681  trace::Span Tracer("ParseIndex");
682  if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
683  if (I->Symbols)
684  Symbols = std::move(*I->Symbols);
685  if (I->Refs)
686  Refs = std::move(*I->Refs);
687  if (I->Relations)
688  Relations = std::move(*I->Relations);
689  } else {
690  elog("Bad index file: {0}", I.takeError());
691  return nullptr;
692  }
693  }
694 
695  size_t NumSym = Symbols.size();
696  size_t NumRefs = Refs.numRefs();
697  size_t NumRelations = Relations.size();
698 
699  trace::Span Tracer("BuildIndex");
700  auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
701  std::move(Relations))
702  : MemIndex::build(std::move(Symbols), std::move(Refs),
703  std::move(Relations));
704  vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
705  " - number of symbols: {3}\n"
706  " - number of refs: {4}\n"
707  " - number of relations: {5}",
708  UseDex ? "Dex" : "MemIndex", SymbolFilename,
709  Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
710  return Index;
711 }
712 
713 } // namespace clangd
714 } // namespace clang
clang::clangd::IndexFileFormat::YAML
clang::clangd::riff::fourCC
constexpr FourCC fourCC(const char(&Literal)[5])
Definition: RIFF.h:44
Dex.h
Headers.h
SymbolOrigin.h
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:296
clang::clangd::IndexFileFormat::RIFF
SymbolLocation.h
Tracer
std::unique_ptr< trace::EventTracer > Tracer
Definition: TraceTests.cpp:163
clang::clangd::SymbolKind::Object
clang::clangd::dex::consume
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:350
clang::clangd::RelationSlab::Builder::insert
void insert(const Relation &R)
Adds a relation to the slab.
Definition: Relation.h:73
clang::clangd::riff::fourCCStr
constexpr llvm::StringRef fourCCStr(const FourCC &Data)
Definition: RIFF.h:47
Trace.h
clang::clangd::dex::Dex::build
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:26
clang::clangd::readIndexFile
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data)
Definition: Serialization.cpp:657
clang::clangd::RefSlab
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:104
clang::clangd::IndexFileOut
Definition: Serialization.h:55
clang::clangd::SymbolIndex::estimateMemoryUsage
virtual size_t estimateMemoryUsage() const =0
Returns estimated size of index (in bytes).
clang::clangd::RefSlab::numRefs
size_t numRefs() const
Definition: Ref.h:119
clang::clangd::readYAML
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef)
Definition: YAMLSerialization.cpp:439
RIFF.h
clang::clangd::IndexFileOut::Format
IndexFileFormat Format
Definition: Serialization.h:62
clang::clangd::riff::readFile
llvm::Expected< File > readFile(llvm::StringRef Stream)
Definition: RIFF.cpp:55
clang::clangd::writeYAML
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
Definition: YAMLSerialization.cpp:406
clang::clangd::RelationSlab
Definition: Relation.h:45
Builder
CodeCompletionBuilder Builder
Definition: CodeCompletionStringsTests.cpp:35
Logger.h
CommandLine
std::vector< llvm::StringRef > CommandLine
Definition: Serialization.cpp:388
Directory
llvm::StringRef Directory
Definition: Serialization.cpp:387
clang::clangd::loadIndex
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
Definition: Serialization.cpp:668
clang::clangd::SymbolID::RawSize
constexpr static size_t RawSize
Definition: SymbolID.h:48
Serialization.h
clang::clangd::vlog
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
clang::doc::SymbolID
std::array< uint8_t, 20 > SymbolID
Definition: Representation.h:30
clang::clangd::MemIndex::build
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
Definition: MemIndex.cpp:19
clang::clangd::operator<<
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Definition: CodeComplete.cpp:1912
Entry
Definition: Modularize.cpp:429
Index
const SymbolIndex * Index
Definition: Dexp.cpp:95
clang::clangd::SymbolSlab::size
size_type size() const
Definition: Symbol.h:190
Strings
std::vector< llvm::StringRef > Strings
Definition: Serialization.cpp:195
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:162
Arena
llvm::BumpPtrAllocator Arena
Definition: Serialization.cpp:194
clang::tidy::cppcoreguidelines::toString
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
Definition: SpecialMemberFunctionsCheck.cpp:60
Symbols
SymbolSlab Symbols
Definition: SymbolCollectorTests.cpp:295
clang::clangd::RefSlab::size
size_t size() const
Gets the number of symbols.
Definition: Ref.h:118
Loc
SourceLocation Loc
'#' location in the include directive
Definition: IncludeOrderCheck.cpp:37
clang::clangd::Callback
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
clang::clangd::visitStrings
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
Definition: Symbol.h:147
clang::clangd::SymbolSlab
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
clang::clangd::elog
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:56
clang::clangd::riff::makeError
static llvm::Error makeError(const llvm::Twine &Msg)
Definition: RIFF.cpp:17
clang::clangd::MessageType::Error
An error message.
clang::clangd::RelationKind
RelationKind
Definition: Relation.h:22
clang::clangd::trace::Span
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:135