17 #include "clang/Tooling/CompilationDatabase.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Compression.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/raw_ostream.h"
29 return llvm::make_error<llvm::StringError>(Msg,
30 llvm::inconvertibleErrorCode());
44 const char *Begin, *End;
48 Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
51 bool err()
const {
return Err; }
53 bool eof()
const {
return Begin == End || Err; }
55 llvm::StringRef rest()
const {
return llvm::StringRef(Begin, End - Begin); }
58 if (LLVM_UNLIKELY(Begin == End)) {
65 uint32_t consume32() {
66 if (LLVM_UNLIKELY(Begin + 4 > End)) {
70 auto Ret = llvm::support::endian::read32le(Begin);
75 llvm::StringRef
consume(
int N) {
76 if (LLVM_UNLIKELY(Begin + N > End)) {
78 return llvm::StringRef();
80 llvm::StringRef Ret(Begin, N);
85 uint32_t consumeVar() {
86 constexpr
static uint8_t More = 1 << 7;
87 uint8_t B = consume8();
88 if (LLVM_LIKELY(!(B & More)))
90 uint32_t Val = B & ~More;
91 for (
int Shift = 7; B & More && Shift < 32; Shift += 7) {
93 Val |= (B & ~More) << Shift;
98 llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef>
Strings) {
99 auto StringIndex = consumeVar();
100 if (LLVM_UNLIKELY(StringIndex >=
Strings.size())) {
102 return llvm::StringRef();
113 void write32(uint32_t I, llvm::raw_ostream &
OS) {
115 llvm::support::endian::write32le(Buf, I);
116 OS.write(Buf,
sizeof(Buf));
119 void writeVar(uint32_t I, llvm::raw_ostream &
OS) {
120 constexpr
static uint8_t More = 1 << 7;
121 if (LLVM_LIKELY(I < 1 << 7)) {
149 class StringTableOut {
150 llvm::DenseSet<llvm::StringRef> Unique;
151 std::vector<llvm::StringRef> Sorted;
153 llvm::DenseMap<std::pair<const char *, size_t>,
unsigned>
Index;
162 void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
164 void finalize(llvm::raw_ostream &
OS) {
165 Sorted = {Unique.begin(), Unique.end()};
167 for (
unsigned I = 0; I < Sorted.size(); ++I)
168 Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
170 std::string RawTable;
171 for (llvm::StringRef S : Sorted) {
172 RawTable.append(std::string(S));
173 RawTable.push_back(0);
175 if (llvm::zlib::isAvailable()) {
176 llvm::SmallString<1> Compressed;
177 llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
178 write32(RawTable.size(),
OS);
186 unsigned index(llvm::StringRef S)
const {
187 assert(!Sorted.empty() &&
"table not finalized");
188 assert(
Index.count({S.data(), S.size()}) &&
"string not interned");
189 return Index.find({S.data(), S.size()})->second;
193 struct StringTableIn {
198 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
200 size_t UncompressedSize = R.consume32();
202 return makeError(
"Truncated string table");
204 llvm::StringRef Uncompressed;
205 llvm::SmallString<1> UncompressedStorage;
206 if (UncompressedSize == 0)
207 Uncompressed = R.rest();
209 if (
llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
212 Uncompressed = UncompressedStorage;
216 llvm::StringSaver Saver(Table.Arena);
217 R = Reader(Uncompressed);
218 for (Reader R(Uncompressed); !R.eof();) {
219 auto Len = R.rest().find(0);
220 if (Len == llvm::StringRef::npos)
221 return makeError(
"Bad string table: not null terminated");
222 Table.Strings.push_back(Saver.save(R.consume(Len)));
226 return makeError(
"Truncated string table");
227 return std::move(Table);
236 void writeLocation(
const SymbolLocation &
Loc,
const StringTableOut &
Strings,
237 llvm::raw_ostream &
OS) {
239 for (
const auto &Endpoint : {
Loc.Start,
Loc.End}) {
240 writeVar(Endpoint.line(),
OS);
241 writeVar(Endpoint.column(),
OS);
245 SymbolLocation readLocation(Reader &Data,
246 llvm::ArrayRef<llvm::StringRef>
Strings) {
248 Loc.FileURI = Data.consumeString(
Strings).data();
249 for (
auto *Endpoint : {&
Loc.Start, &
Loc.End}) {
250 Endpoint->setLine(Data.consumeVar());
251 Endpoint->setColumn(Data.consumeVar());
256 IncludeGraphNode readIncludeGraphNode(Reader &Data,
257 llvm::ArrayRef<llvm::StringRef>
Strings) {
258 IncludeGraphNode IGN;
259 IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
260 IGN.URI = Data.consumeString(
Strings);
261 llvm::StringRef Digest = Data.consume(IGN.Digest.size());
262 std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
263 IGN.DirectIncludes.resize(Data.consumeVar());
264 for (llvm::StringRef &Include : IGN.DirectIncludes)
265 Include = Data.consumeString(
Strings);
269 void writeIncludeGraphNode(
const IncludeGraphNode &IGN,
271 llvm::raw_ostream &
OS) {
272 OS.write(static_cast<uint8_t>(IGN.Flags));
274 llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
277 writeVar(IGN.DirectIncludes.size(),
OS);
278 for (llvm::StringRef Include : IGN.DirectIncludes)
282 void writeSymbol(
const Symbol &Sym,
const StringTableOut &
Strings,
283 llvm::raw_ostream &
OS) {
286 OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
287 OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
290 writeVar(
Strings.index(Sym.TemplateSpecializationArgs),
OS);
291 writeLocation(Sym.Definition,
Strings,
OS);
292 writeLocation(Sym.CanonicalDeclaration,
Strings,
OS);
293 writeVar(Sym.References,
OS);
294 OS.write(static_cast<uint8_t>(Sym.Flags));
295 OS.write(static_cast<uint8_t>(Sym.Origin));
296 writeVar(
Strings.index(Sym.Signature),
OS);
297 writeVar(
Strings.index(Sym.CompletionSnippetSuffix),
OS);
298 writeVar(
Strings.index(Sym.Documentation),
OS);
299 writeVar(
Strings.index(Sym.ReturnType),
OS);
302 auto WriteInclude = [&](
const Symbol::IncludeHeaderWithReferences &Include) {
303 writeVar(
Strings.index(Include.IncludeHeader),
OS);
304 writeVar(Include.References,
OS);
306 writeVar(Sym.IncludeHeaders.size(),
OS);
307 for (
const auto &Include : Sym.IncludeHeaders)
308 WriteInclude(Include);
311 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef>
Strings) {
313 Sym.ID = Data.consumeID();
314 Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
315 Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
316 Sym.Name = Data.consumeString(
Strings);
317 Sym.Scope = Data.consumeString(
Strings);
318 Sym.TemplateSpecializationArgs = Data.consumeString(
Strings);
319 Sym.Definition = readLocation(Data,
Strings);
320 Sym.CanonicalDeclaration = readLocation(Data,
Strings);
321 Sym.References = Data.consumeVar();
322 Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
323 Sym.Origin = static_cast<SymbolOrigin>(Data.consume8());
324 Sym.Signature = Data.consumeString(
Strings);
325 Sym.CompletionSnippetSuffix = Data.consumeString(
Strings);
326 Sym.Documentation = Data.consumeString(
Strings);
327 Sym.ReturnType = Data.consumeString(
Strings);
328 Sym.Type = Data.consumeString(
Strings);
329 Sym.IncludeHeaders.resize(Data.consumeVar());
330 for (
auto &I : Sym.IncludeHeaders) {
331 I.IncludeHeader = Data.consumeString(
Strings);
332 I.References = Data.consumeVar();
344 void writeRefs(
const SymbolID &ID, llvm::ArrayRef<Ref>
Refs,
345 const StringTableOut &
Strings, llvm::raw_ostream &
OS) {
348 for (
const auto &Ref :
Refs) {
349 OS.write(static_cast<unsigned char>(Ref.Kind));
350 writeLocation(Ref.Location,
Strings,
OS);
354 std::pair<SymbolID, std::vector<Ref>>
355 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef>
Strings) {
356 std::pair<SymbolID, std::vector<Ref>> Result;
357 Result.first = Data.consumeID();
358 Result.second.resize(Data.consumeVar());
359 for (
auto &Ref : Result.second) {
360 Ref.Kind = static_cast<RefKind>(Data.consume8());
361 Ref.Location = readLocation(Data,
Strings);
373 void writeRelation(
const Relation &R, llvm::raw_ostream &
OS) {
374 OS << R.Subject.raw();
375 OS.write(static_cast<uint8_t>(R.Predicate));
376 OS << R.Object.raw();
379 Relation readRelation(Reader &Data) {
380 SymbolID Subject = Data.consumeID();
381 RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
383 return {Subject, Predicate,
Object};
386 struct InternedCompileCommand {
391 void writeCompileCommand(
const InternedCompileCommand &Cmd,
393 llvm::raw_ostream &CmdOS) {
394 writeVar(
Strings.index(Cmd.Directory), CmdOS);
395 writeVar(Cmd.CommandLine.size(), CmdOS);
396 for (llvm::StringRef C : Cmd.CommandLine)
397 writeVar(
Strings.index(C), CmdOS);
400 InternedCompileCommand
401 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef>
Strings) {
402 InternedCompileCommand Cmd;
403 Cmd.Directory = CmdReader.consumeString(
Strings);
404 Cmd.CommandLine.resize(CmdReader.consumeVar());
405 for (llvm::StringRef &C : Cmd.CommandLine)
406 C = CmdReader.consumeString(
Strings);
422 constexpr
static uint32_t Version = 13;
424 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
427 return RIFF.takeError();
430 llvm::StringMap<llvm::StringRef> Chunks;
431 for (
const auto &Chunk :
RIFF->Chunks)
432 Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
435 if (!Chunks.count(
"meta"))
437 Reader Meta(Chunks.lookup(
"meta"));
438 auto SeenVersion = Meta.consume32();
439 if (SeenVersion != Version)
440 return makeError(
"wrong version: want " + llvm::Twine(Version) +
", got " +
441 llvm::Twine(SeenVersion));
444 for (llvm::StringRef RequiredChunk : {
"stri"})
445 if (!Chunks.count(RequiredChunk))
446 return makeError(
"missing required chunk " + RequiredChunk);
448 auto Strings = readStringTable(Chunks.lookup(
"stri"));
453 if (Chunks.count(
"srcs")) {
454 Reader SrcsReader(Chunks.lookup(
"srcs"));
455 Result.Sources.emplace();
456 while (!SrcsReader.eof()) {
457 auto IGN = readIncludeGraphNode(SrcsReader,
Strings->Strings);
458 auto Entry = Result.Sources->try_emplace(IGN.URI).first;
459 Entry->getValue() = std::move(IGN);
463 for (
auto &Include :
Entry->getValue().DirectIncludes)
464 Include = Result.Sources->try_emplace(Include).first->getKey();
466 if (SrcsReader.err())
467 return makeError(
"malformed or truncated include uri");
470 if (Chunks.count(
"symb")) {
471 Reader SymbolReader(Chunks.lookup(
"symb"));
473 while (!SymbolReader.eof())
475 if (SymbolReader.err())
476 return makeError(
"malformed or truncated symbol");
477 Result.Symbols = std::move(
Symbols).build();
479 if (Chunks.count(
"refs")) {
480 Reader RefsReader(Chunks.lookup(
"refs"));
482 while (!RefsReader.eof()) {
483 auto RefsBundle = readRefs(RefsReader,
Strings->Strings);
484 for (
const auto &Ref : RefsBundle.second)
485 Refs.insert(RefsBundle.first, Ref);
487 if (RefsReader.err())
488 return makeError(
"malformed or truncated refs");
489 Result.Refs = std::move(
Refs).build();
491 if (Chunks.count(
"rela")) {
492 Reader RelationsReader(Chunks.lookup(
"rela"));
494 while (!RelationsReader.eof()) {
495 auto Relation = readRelation(RelationsReader);
496 Relations.
insert(Relation);
498 if (RelationsReader.err())
499 return makeError(
"malformed or truncated relations");
500 Result.Relations = std::move(Relations).build();
502 if (Chunks.count(
"cmdl")) {
503 Reader CmdReader(Chunks.lookup(
"cmdl"));
505 return makeError(
"malformed or truncated commandline section");
506 InternedCompileCommand Cmd =
507 readCompileCommand(CmdReader,
Strings->Strings);
508 Result.Cmd.emplace();
509 Result.Cmd->Directory = std::string(Cmd.Directory);
510 Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
511 for (llvm::StringRef C : Cmd.CommandLine)
512 Result.Cmd->CommandLine.emplace_back(C);
514 return std::move(Result);
517 template <
class Callback>
520 for (llvm::StringRef &Include : IGN.DirectIncludes)
524 void writeRIFF(
const IndexFileOut &Data, llvm::raw_ostream &
OS) {
525 assert(Data.Symbols &&
"An index file without symbols makes no sense!");
529 llvm::SmallString<4> Meta;
531 llvm::raw_svector_ostream MetaOS(Meta);
532 write32(Version, MetaOS);
538 for (
const auto &Sym : *Data.Symbols) {
541 [&](llvm::StringRef &S) {
Strings.intern(S); });
543 std::vector<IncludeGraphNode> Sources;
545 for (
const auto &Source : *Data.Sources) {
546 Sources.push_back(Source.getValue());
548 [&](llvm::StringRef &S) {
Strings.intern(S); });
551 std::vector<std::pair<SymbolID, std::vector<Ref>>>
Refs;
553 for (
const auto &Sym : *Data.Refs) {
554 Refs.emplace_back(Sym);
555 for (
auto &Ref :
Refs.back().second) {
556 llvm::StringRef File = Ref.Location.FileURI;
558 Ref.Location.FileURI = File.data();
563 std::vector<Relation> Relations;
564 if (Data.Relations) {
565 for (
const auto &Relation : *Data.Relations) {
566 Relations.emplace_back(Relation);
571 InternedCompileCommand InternedCmd;
573 InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
574 InternedCmd.Directory = Data.Cmd->Directory;
575 Strings.intern(InternedCmd.Directory);
576 for (llvm::StringRef C : Data.Cmd->CommandLine) {
577 InternedCmd.CommandLine.emplace_back(C);
578 Strings.intern(InternedCmd.CommandLine.back());
582 std::string StringSection;
584 llvm::raw_string_ostream StringOS(StringSection);
589 std::string SymbolSection;
591 llvm::raw_string_ostream SymbolOS(SymbolSection);
592 for (
const auto &Sym :
Symbols)
593 writeSymbol(Sym,
Strings, SymbolOS);
597 std::string RefsSection;
600 llvm::raw_string_ostream RefsOS(RefsSection);
601 for (
const auto &Sym :
Refs)
602 writeRefs(Sym.first, Sym.second,
Strings, RefsOS);
607 std::string RelationSection;
608 if (Data.Relations) {
610 llvm::raw_string_ostream RelationOS{RelationSection};
611 for (
const auto &Relation : Relations)
612 writeRelation(Relation, RelationOS);
617 std::string SrcsSection;
620 llvm::raw_string_ostream SrcsOS(SrcsSection);
621 for (
const auto &SF : Sources)
622 writeIncludeGraphNode(SF,
Strings, SrcsOS);
627 std::string CmdlSection;
630 llvm::raw_string_ostream CmdOS(CmdlSection);
631 writeCompileCommand(InternedCmd,
Strings, CmdOS);
642 void writeYAML(
const IndexFileOut &, llvm::raw_ostream &);
643 llvm::Expected<IndexFileIn>
readYAML(llvm::StringRef);
658 if (Data.startswith(
"RIFF")) {
659 return readRIFF(Data);
660 }
else if (
auto YAMLContents =
readYAML(Data)) {
661 return std::move(*YAMLContents);
663 return makeError(
"Not a RIFF file and failed to parse as YAML: " +
668 std::unique_ptr<SymbolIndex>
loadIndex(llvm::StringRef SymbolFilename,
671 auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
673 elog(
"Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
684 Symbols = std::move(*I->Symbols);
686 Refs = std::move(*I->Refs);
688 Relations = std::move(*I->Relations);
690 elog(
"Bad index file: {0}", I.takeError());
697 size_t NumRelations = Relations.size();
701 std::move(Relations))
703 std::move(Relations));
704 vlog(
"Loaded {0} from {1} with estimated memory usage {2} bytes\n"
705 " - number of symbols: {3}\n"
706 " - number of refs: {4}\n"
707 " - number of relations: {5}",
708 UseDex ?
"Dex" :
"MemIndex", SymbolFilename,