17 #include "clang/Tooling/CompilationDatabase.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Support/Compression.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/raw_ostream.h" 29 return llvm::make_error<llvm::StringError>(Msg,
30 llvm::inconvertibleErrorCode());
39 case index::SymbolRole::RelationBaseOf:
42 llvm_unreachable(
"Unsupported symbol role");
49 return index::SymbolRole::RelationBaseOf;
51 llvm_unreachable(
"Invalid relation kind");
67 const char *Begin, *End;
71 Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
74 bool err()
const {
return Err; }
76 bool eof()
const {
return Begin == End || Err; }
78 llvm::StringRef rest()
const {
return llvm::StringRef(Begin, End - Begin); }
81 if (LLVM_UNLIKELY(Begin == End)) {
88 uint32_t consume32() {
89 if (LLVM_UNLIKELY(Begin + 4 > End)) {
93 auto Ret = llvm::support::endian::read32le(Begin);
98 llvm::StringRef
consume(
int N) {
99 if (LLVM_UNLIKELY(Begin + N > End)) {
101 return llvm::StringRef();
103 llvm::StringRef Ret(Begin, N);
108 uint32_t consumeVar() {
109 constexpr
static uint8_t More = 1 << 7;
110 uint8_t B = consume8();
111 if (LLVM_LIKELY(!(B & More)))
113 uint32_t Val = B & ~More;
114 for (
int Shift = 7; B & More && Shift < 32; Shift += 7) {
116 Val |= (B & ~More) << Shift;
121 llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef>
Strings) {
122 auto StringIndex = consumeVar();
123 if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
125 return llvm::StringRef();
127 return Strings[StringIndex];
136 void write32(uint32_t I, llvm::raw_ostream &OS) {
138 llvm::support::endian::write32le(Buf, I);
139 OS.write(Buf,
sizeof(Buf));
142 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
143 constexpr
static uint8_t More = 1 << 7;
144 if (LLVM_LIKELY(I < 1 << 7)) {
172 class StringTableOut {
173 llvm::DenseSet<llvm::StringRef> Unique;
174 std::vector<llvm::StringRef> Sorted;
176 llvm::DenseMap<std::pair<const char *, size_t>,
unsigned>
Index;
185 void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
187 void finalize(llvm::raw_ostream &OS) {
188 Sorted = {Unique.begin(), Unique.end()};
190 for (
unsigned I = 0; I < Sorted.size(); ++I)
191 Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
193 std::string RawTable;
194 for (llvm::StringRef S : Sorted) {
196 RawTable.push_back(0);
198 if (llvm::zlib::isAvailable()) {
199 llvm::SmallString<1> Compressed;
200 llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
201 write32(RawTable.size(), OS);
209 unsigned index(llvm::StringRef S)
const {
210 assert(!Sorted.empty() &&
"table not finalized");
211 assert(Index.count({S.data(), S.size()}) &&
"string not interned");
212 return Index.find({S.data(), S.size()})->second;
216 struct StringTableIn {
221 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
223 size_t UncompressedSize = R.consume32();
225 return makeError(
"Truncated string table");
227 llvm::StringRef Uncompressed;
228 llvm::SmallString<1> UncompressedStorage;
229 if (UncompressedSize == 0)
230 Uncompressed = R.rest();
232 if (
llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
235 Uncompressed = UncompressedStorage;
239 llvm::StringSaver Saver(Table.Arena);
240 R = Reader(Uncompressed);
241 for (Reader R(Uncompressed); !R.eof();) {
242 auto Len = R.rest().find(0);
243 if (Len == llvm::StringRef::npos)
244 return makeError(
"Bad string table: not null terminated");
245 Table.Strings.push_back(Saver.save(R.consume(Len)));
249 return makeError(
"Truncated string table");
250 return std::move(Table);
259 void writeLocation(
const SymbolLocation &
Loc,
const StringTableOut &
Strings,
260 llvm::raw_ostream &OS) {
261 writeVar(Strings.index(Loc.FileURI), OS);
262 for (
const auto &Endpoint : {Loc.Start, Loc.End}) {
263 writeVar(Endpoint.line(), OS);
264 writeVar(Endpoint.column(), OS);
268 SymbolLocation readLocation(Reader &Data,
269 llvm::ArrayRef<llvm::StringRef> Strings) {
271 Loc.FileURI = Data.consumeString(Strings).data();
272 for (
auto *Endpoint : {&Loc.Start, &Loc.End}) {
273 Endpoint->setLine(Data.consumeVar());
274 Endpoint->setColumn(Data.consumeVar());
279 IncludeGraphNode readIncludeGraphNode(Reader &Data,
280 llvm::ArrayRef<llvm::StringRef> Strings) {
281 IncludeGraphNode IGN;
283 IGN.URI = Data.consumeString(Strings);
284 llvm::StringRef Digest = Data.consume(IGN.Digest.size());
285 std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
286 IGN.DirectIncludes.resize(Data.consumeVar());
287 for (llvm::StringRef &Include : IGN.DirectIncludes)
288 Include = Data.consumeString(Strings);
292 void writeIncludeGraphNode(
const IncludeGraphNode &IGN,
293 const StringTableOut &Strings,
294 llvm::raw_ostream &OS) {
295 OS.write(static_cast<uint8_t>(IGN.Flags));
296 writeVar(Strings.index(IGN.URI), OS);
297 llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
300 writeVar(IGN.DirectIncludes.size(), OS);
301 for (llvm::StringRef Include : IGN.DirectIncludes)
302 writeVar(Strings.index(Include), OS);
305 void writeSymbol(
const Symbol &Sym,
const StringTableOut &Strings,
306 llvm::raw_ostream &OS) {
309 OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
310 OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
311 writeVar(Strings.index(Sym.Name), OS);
312 writeVar(Strings.index(Sym.Scope), OS);
313 writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
314 writeLocation(Sym.Definition, Strings, OS);
315 writeLocation(Sym.CanonicalDeclaration, Strings, OS);
316 writeVar(Sym.References, OS);
317 OS.write(static_cast<uint8_t>(Sym.Flags));
318 OS.write(static_cast<uint8_t>(Sym.Origin));
319 writeVar(Strings.index(Sym.Signature), OS);
320 writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
321 writeVar(Strings.index(Sym.Documentation), OS);
322 writeVar(Strings.index(Sym.ReturnType), OS);
323 writeVar(Strings.index(Sym.Type), OS);
325 auto WriteInclude = [&](
const Symbol::IncludeHeaderWithReferences &Include) {
326 writeVar(Strings.index(Include.IncludeHeader), OS);
327 writeVar(Include.References, OS);
329 writeVar(Sym.IncludeHeaders.size(), OS);
330 for (
const auto &Include : Sym.IncludeHeaders)
331 WriteInclude(Include);
334 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
336 Sym.ID = Data.consumeID();
338 Sym.SymInfo.Lang =
static_cast<index::SymbolLanguage
>(Data.consume8());
339 Sym.Name = Data.consumeString(Strings);
340 Sym.Scope = Data.consumeString(Strings);
341 Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
342 Sym.Definition = readLocation(Data, Strings);
343 Sym.CanonicalDeclaration = readLocation(Data, Strings);
344 Sym.References = Data.consumeVar();
346 Sym.Origin =
static_cast<SymbolOrigin>(Data.consume8());
347 Sym.Signature = Data.consumeString(Strings);
348 Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
349 Sym.Documentation = Data.consumeString(Strings);
350 Sym.ReturnType = Data.consumeString(Strings);
351 Sym.Type = Data.consumeString(Strings);
352 Sym.IncludeHeaders.resize(Data.consumeVar());
353 for (
auto &I : Sym.IncludeHeaders) {
354 I.IncludeHeader = Data.consumeString(Strings);
355 I.References = Data.consumeVar();
367 void writeRefs(
const SymbolID &ID, llvm::ArrayRef<Ref>
Refs,
368 const StringTableOut &Strings, llvm::raw_ostream &OS) {
370 writeVar(Refs.size(), OS);
371 for (
const auto &Ref : Refs) {
372 OS.write(static_cast<unsigned char>(Ref.Kind));
373 writeLocation(Ref.Location, Strings, OS);
377 std::pair<SymbolID, std::vector<Ref>>
378 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
379 std::pair<SymbolID, std::vector<Ref>>
Result;
380 Result.first = Data.consumeID();
381 Result.second.resize(Data.consumeVar());
382 for (
auto &Ref : Result.second) {
383 Ref.Kind =
static_cast<RefKind>(Data.consume8());
384 Ref.Location = readLocation(Data, Strings);
396 void writeRelation(
const Relation &R, llvm::raw_ostream &OS) {
397 OS << R.Subject.raw();
399 OS.write(static_cast<uint8_t>(Kind));
400 OS << R.Object.raw();
403 Relation readRelation(Reader &Data) {
404 SymbolID Subject = Data.consumeID();
405 index::SymbolRole Predicate =
408 return {Subject, Predicate, Object};
411 struct InternedCompileCommand {
416 void writeCompileCommand(
const InternedCompileCommand &Cmd,
417 const StringTableOut &Strings,
418 llvm::raw_ostream &CmdOS) {
419 writeVar(Strings.index(Cmd.Directory), CmdOS);
420 writeVar(Cmd.CommandLine.size(), CmdOS);
421 for (llvm::StringRef C : Cmd.CommandLine)
422 writeVar(Strings.index(C), CmdOS);
425 InternedCompileCommand
426 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
427 InternedCompileCommand Cmd;
428 Cmd.Directory = CmdReader.consumeString(Strings);
429 Cmd.CommandLine.resize(CmdReader.consumeVar());
430 for (llvm::StringRef &C : Cmd.CommandLine)
431 C = CmdReader.consumeString(Strings);
447 constexpr
static uint32_t Version = 12;
449 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
452 return RIFF.takeError();
455 llvm::StringMap<llvm::StringRef> Chunks;
456 for (
const auto &Chunk :
RIFF->Chunks)
457 Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
460 for (llvm::StringRef RequiredChunk : {
"meta",
"stri"})
461 if (!Chunks.count(RequiredChunk))
462 return makeError(
"missing required chunk " + RequiredChunk);
464 Reader Meta(Chunks.lookup(
"meta"));
465 if (Meta.consume32() != Version)
468 auto Strings = readStringTable(Chunks.lookup(
"stri"));
470 return Strings.takeError();
473 if (Chunks.count(
"srcs")) {
474 Reader SrcsReader(Chunks.lookup(
"srcs"));
475 Result.Sources.emplace();
476 while (!SrcsReader.eof()) {
477 auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
478 auto Entry = Result.Sources->try_emplace(IGN.URI).first;
479 Entry->getValue() = std::move(IGN);
483 for (
auto &Include :
Entry->getValue().DirectIncludes)
484 Include = Result.Sources->try_emplace(Include).first->getKey();
486 if (SrcsReader.err())
487 return makeError(
"malformed or truncated include uri");
490 if (Chunks.count(
"symb")) {
491 Reader SymbolReader(Chunks.lookup(
"symb"));
493 while (!SymbolReader.eof())
494 Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
495 if (SymbolReader.err())
496 return makeError(
"malformed or truncated symbol");
497 Result.Symbols = std::move(Symbols).build();
499 if (Chunks.count(
"refs")) {
500 Reader RefsReader(Chunks.lookup(
"refs"));
502 while (!RefsReader.eof()) {
503 auto RefsBundle = readRefs(RefsReader, Strings->Strings);
504 for (
const auto &Ref : RefsBundle.second)
505 Refs.insert(RefsBundle.first, Ref);
507 if (RefsReader.err())
508 return makeError(
"malformed or truncated refs");
509 Result.Refs = std::move(Refs).build();
511 if (Chunks.count(
"rela")) {
512 Reader RelationsReader(Chunks.lookup(
"rela"));
514 while (!RelationsReader.eof()) {
515 auto Relation = readRelation(RelationsReader);
516 Relations.insert(Relation);
518 if (RelationsReader.err())
519 return makeError(
"malformed or truncated relations");
520 Result.Relations = std::move(Relations).build();
522 if (Chunks.count(
"cmdl")) {
523 Reader CmdReader(Chunks.lookup(
"cmdl"));
525 return makeError(
"malformed or truncated commandline section");
526 InternedCompileCommand Cmd =
527 readCompileCommand(CmdReader, Strings->Strings);
528 Result.Cmd.emplace();
529 Result.Cmd->Directory = Cmd.Directory;
530 Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
531 for (llvm::StringRef C : Cmd.CommandLine)
532 Result.Cmd->CommandLine.emplace_back(C);
534 return std::move(Result);
537 template <
class Callback>
540 for (llvm::StringRef &Include : IGN.DirectIncludes)
544 void writeRIFF(
const IndexFileOut &Data, llvm::raw_ostream &OS) {
545 assert(Data.Symbols &&
"An index file without symbols makes no sense!");
549 llvm::SmallString<4> Meta;
551 llvm::raw_svector_ostream MetaOS(Meta);
552 write32(Version, MetaOS);
558 for (
const auto &Sym : *Data.Symbols) {
559 Symbols.emplace_back(Sym);
561 [&](llvm::StringRef &S) { Strings.intern(S); });
563 std::vector<IncludeGraphNode> Sources;
565 for (
const auto &Source : *Data.Sources) {
566 Sources.push_back(Source.getValue());
568 [&](llvm::StringRef &S) { Strings.intern(S); });
571 std::vector<std::pair<SymbolID, std::vector<Ref>>>
Refs;
573 for (
const auto &Sym : *Data.Refs) {
574 Refs.emplace_back(Sym);
575 for (
auto &Ref : Refs.back().second) {
576 llvm::StringRef
File = Ref.Location.FileURI;
577 Strings.intern(File);
578 Ref.Location.FileURI = File.data();
584 if (Data.Relations) {
585 for (
const auto &Relation : *Data.Relations) {
586 Relations.emplace_back(Relation);
591 InternedCompileCommand InternedCmd;
593 InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
594 InternedCmd.Directory = Data.Cmd->Directory;
595 Strings.intern(InternedCmd.Directory);
596 for (llvm::StringRef C : Data.Cmd->CommandLine) {
597 InternedCmd.CommandLine.emplace_back(C);
598 Strings.intern(InternedCmd.CommandLine.back());
602 std::string StringSection;
604 llvm::raw_string_ostream StringOS(StringSection);
605 Strings.finalize(StringOS);
607 RIFF.Chunks.push_back({
riff::fourCC(
"stri"), StringSection});
609 std::string SymbolSection;
611 llvm::raw_string_ostream SymbolOS(SymbolSection);
612 for (
const auto &Sym : Symbols)
613 writeSymbol(Sym, Strings, SymbolOS);
615 RIFF.Chunks.push_back({
riff::fourCC(
"symb"), SymbolSection});
617 std::string RefsSection;
620 llvm::raw_string_ostream RefsOS(RefsSection);
621 for (
const auto &Sym : Refs)
622 writeRefs(Sym.first, Sym.second, Strings, RefsOS);
624 RIFF.Chunks.push_back({
riff::fourCC(
"refs"), RefsSection});
627 std::string RelationSection;
628 if (Data.Relations) {
630 llvm::raw_string_ostream RelationOS{RelationSection};
631 for (
const auto &Relation : Relations)
632 writeRelation(Relation, RelationOS);
634 RIFF.Chunks.push_back({
riff::fourCC(
"rela"), RelationSection});
637 std::string SrcsSection;
640 llvm::raw_string_ostream SrcsOS(SrcsSection);
641 for (
const auto &SF : Sources)
642 writeIncludeGraphNode(SF, Strings, SrcsOS);
644 RIFF.Chunks.push_back({
riff::fourCC(
"srcs"), SrcsSection});
647 std::string CmdlSection;
650 llvm::raw_string_ostream CmdOS(CmdlSection);
651 writeCompileCommand(InternedCmd, Strings, CmdOS);
653 RIFF.Chunks.push_back({
riff::fourCC(
"cmdl"), CmdlSection});
662 void writeYAML(
const IndexFileOut &, llvm::raw_ostream &);
663 llvm::Expected<IndexFileIn>
readYAML(llvm::StringRef);
678 if (Data.startswith(
"RIFF")) {
679 return readRIFF(Data);
680 }
else if (
auto YAMLContents =
readYAML(Data)) {
681 return std::move(*YAMLContents);
683 return makeError(
"Not a RIFF file and failed to parse as YAML: " +
688 std::unique_ptr<SymbolIndex>
loadIndex(llvm::StringRef SymbolFilename,
691 auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
693 elog(
"Can't open {0}", SymbolFilename);
704 Symbols = std::move(*I->Symbols);
706 Refs = std::move(*I->Refs);
708 Relations = std::move(*I->Relations);
710 elog(
"Bad Index: {0}", I.takeError());
715 size_t NumSym = Symbols.
size();
716 size_t NumRefs = Refs.
numRefs();
717 size_t NumRelations = Relations.
size();
721 std::move(Relations))
723 std::move(Relations));
724 vlog(
"Loaded {0} from {1} with estimated memory usage {2} bytes\n" 725 " - number of symbols: {3}\n" 726 " - number of refs: {4}\n" 727 " - numnber of relations: {5}",
728 UseDex ?
"Dex" :
"MemIndex", SymbolFilename,
729 Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
SourceLocation Loc
'#' location in the include directive
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef)
An immutable symbol container that stores a set of symbols.
static SymbolID fromRaw(llvm::StringRef)
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
An efficient structure of storing large set of symbol references in memory.
static llvm::Error makeError(const char *Msg)
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
RelationKind symbolRoleToRelationKind(index::SymbolRole Role)
clang::find_all_symbols::SymbolInfo::SymbolKind SymbolKind
constexpr FourCC fourCC(const char(&Literal)[5])
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
llvm::Expected< File > readFile(llvm::StringRef Stream)
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data)
void vlog(const char *Fmt, Ts &&... Vals)
void elog(const char *Fmt, Ts &&... Vals)
std::vector< llvm::StringRef > CommandLine
static constexpr size_t RawSize
llvm::BumpPtrAllocator Arena
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
index::SymbolRoleSet Role
RefKind
Describes the kind of a cross-reference.
llvm::StringRef Directory
std::vector< llvm::StringRef > Strings
CodeCompletionBuilder Builder
index::SymbolRole relationKindToSymbolRole(RelationKind Kind)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Records an event whose duration is the lifetime of the Span object.
std::array< uint8_t, 20 > SymbolID
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
const SymbolIndex * Index