17 #include "clang/Tooling/CompilationDatabase.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Support/Compression.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/raw_ostream.h" 29 return llvm::make_error<llvm::StringError>(Msg,
30 llvm::inconvertibleErrorCode());
44 const char *Begin, *End;
48 Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
51 bool err()
const {
return Err; }
53 bool eof()
const {
return Begin == End || Err; }
55 llvm::StringRef rest()
const {
return llvm::StringRef(Begin, End - Begin); }
58 if (LLVM_UNLIKELY(Begin == End)) {
65 uint32_t consume32() {
66 if (LLVM_UNLIKELY(Begin + 4 > End)) {
70 auto Ret = llvm::support::endian::read32le(Begin);
75 llvm::StringRef
consume(
int N) {
76 if (LLVM_UNLIKELY(Begin + N > End)) {
78 return llvm::StringRef();
80 llvm::StringRef Ret(Begin, N);
85 uint32_t consumeVar() {
86 constexpr
static uint8_t More = 1 << 7;
87 uint8_t B = consume8();
88 if (LLVM_LIKELY(!(B & More)))
90 uint32_t Val = B & ~More;
91 for (
int Shift = 7; B & More && Shift < 32; Shift += 7) {
93 Val |= (B & ~More) << Shift;
98 llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef>
Strings) {
99 auto StringIndex = consumeVar();
100 if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
102 return llvm::StringRef();
104 return Strings[StringIndex];
113 void write32(uint32_t I, llvm::raw_ostream &OS) {
115 llvm::support::endian::write32le(Buf, I);
116 OS.write(Buf,
sizeof(Buf));
119 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
120 constexpr
static uint8_t More = 1 << 7;
121 if (LLVM_LIKELY(I < 1 << 7)) {
149 class StringTableOut {
150 llvm::DenseSet<llvm::StringRef> Unique;
151 std::vector<llvm::StringRef> Sorted;
153 llvm::DenseMap<std::pair<const char *, size_t>,
unsigned>
Index;
162 void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
164 void finalize(llvm::raw_ostream &OS) {
165 Sorted = {Unique.begin(), Unique.end()};
167 for (
unsigned I = 0; I < Sorted.size(); ++I)
168 Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
170 std::string RawTable;
171 for (llvm::StringRef S : Sorted) {
173 RawTable.push_back(0);
175 if (llvm::zlib::isAvailable()) {
176 llvm::SmallString<1> Compressed;
177 llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
178 write32(RawTable.size(), OS);
186 unsigned index(llvm::StringRef S)
const {
187 assert(!Sorted.empty() &&
"table not finalized");
188 assert(Index.count({S.data(), S.size()}) &&
"string not interned");
189 return Index.find({S.data(), S.size()})->second;
193 struct StringTableIn {
198 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
200 size_t UncompressedSize = R.consume32();
202 return makeError(
"Truncated string table");
204 llvm::StringRef Uncompressed;
205 llvm::SmallString<1> UncompressedStorage;
206 if (UncompressedSize == 0)
207 Uncompressed = R.rest();
209 if (
llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
212 Uncompressed = UncompressedStorage;
216 llvm::StringSaver Saver(Table.Arena);
217 R = Reader(Uncompressed);
218 for (Reader R(Uncompressed); !R.eof();) {
219 auto Len = R.rest().find(0);
220 if (Len == llvm::StringRef::npos)
221 return makeError(
"Bad string table: not null terminated");
222 Table.Strings.push_back(Saver.save(R.consume(Len)));
226 return makeError(
"Truncated string table");
227 return std::move(Table);
236 void writeLocation(
const SymbolLocation &
Loc,
const StringTableOut &Strings,
237 llvm::raw_ostream &OS) {
238 writeVar(Strings.index(Loc.FileURI), OS);
239 for (
const auto &Endpoint : {Loc.Start, Loc.End}) {
240 writeVar(Endpoint.line(), OS);
241 writeVar(Endpoint.column(), OS);
245 SymbolLocation readLocation(Reader &Data,
246 llvm::ArrayRef<llvm::StringRef> Strings) {
248 Loc.FileURI = Data.consumeString(Strings).data();
249 for (
auto *Endpoint : {&Loc.Start, &Loc.End}) {
250 Endpoint->setLine(Data.consumeVar());
251 Endpoint->setColumn(Data.consumeVar());
256 IncludeGraphNode readIncludeGraphNode(Reader &Data,
257 llvm::ArrayRef<llvm::StringRef> Strings) {
258 IncludeGraphNode IGN;
260 IGN.URI = Data.consumeString(Strings);
261 llvm::StringRef Digest = Data.consume(IGN.Digest.size());
262 std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
263 IGN.DirectIncludes.resize(Data.consumeVar());
264 for (llvm::StringRef &Include : IGN.DirectIncludes)
265 Include = Data.consumeString(Strings);
269 void writeIncludeGraphNode(
const IncludeGraphNode &IGN,
270 const StringTableOut &Strings,
271 llvm::raw_ostream &OS) {
272 OS.write(static_cast<uint8_t>(IGN.Flags));
273 writeVar(Strings.index(IGN.URI), OS);
274 llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
277 writeVar(IGN.DirectIncludes.size(), OS);
278 for (llvm::StringRef Include : IGN.DirectIncludes)
279 writeVar(Strings.index(Include), OS);
282 void writeSymbol(
const Symbol &Sym,
const StringTableOut &Strings,
283 llvm::raw_ostream &OS) {
286 OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
287 OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
288 writeVar(Strings.index(Sym.Name), OS);
289 writeVar(Strings.index(Sym.Scope), OS);
290 writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
291 writeLocation(Sym.Definition, Strings, OS);
292 writeLocation(Sym.CanonicalDeclaration, Strings, OS);
293 writeVar(Sym.References, OS);
294 OS.write(static_cast<uint8_t>(Sym.Flags));
295 OS.write(static_cast<uint8_t>(Sym.Origin));
296 writeVar(Strings.index(Sym.Signature), OS);
297 writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
298 writeVar(Strings.index(Sym.Documentation), OS);
299 writeVar(Strings.index(Sym.ReturnType), OS);
300 writeVar(Strings.index(Sym.Type), OS);
302 auto WriteInclude = [&](
const Symbol::IncludeHeaderWithReferences &Include) {
303 writeVar(Strings.index(Include.IncludeHeader), OS);
304 writeVar(Include.References, OS);
306 writeVar(Sym.IncludeHeaders.size(), OS);
307 for (
const auto &Include : Sym.IncludeHeaders)
308 WriteInclude(Include);
311 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
313 Sym.ID = Data.consumeID();
315 Sym.SymInfo.Lang =
static_cast<index::SymbolLanguage
>(Data.consume8());
316 Sym.Name = Data.consumeString(Strings);
317 Sym.Scope = Data.consumeString(Strings);
318 Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
319 Sym.Definition = readLocation(Data, Strings);
320 Sym.CanonicalDeclaration = readLocation(Data, Strings);
321 Sym.References = Data.consumeVar();
323 Sym.Origin =
static_cast<SymbolOrigin>(Data.consume8());
324 Sym.Signature = Data.consumeString(Strings);
325 Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
326 Sym.Documentation = Data.consumeString(Strings);
327 Sym.ReturnType = Data.consumeString(Strings);
328 Sym.Type = Data.consumeString(Strings);
329 Sym.IncludeHeaders.resize(Data.consumeVar());
330 for (
auto &I : Sym.IncludeHeaders) {
331 I.IncludeHeader = Data.consumeString(Strings);
332 I.References = Data.consumeVar();
344 void writeRefs(
const SymbolID &ID, llvm::ArrayRef<Ref>
Refs,
345 const StringTableOut &Strings, llvm::raw_ostream &OS) {
347 writeVar(Refs.size(), OS);
348 for (
const auto &Ref : Refs) {
349 OS.write(static_cast<unsigned char>(Ref.Kind));
350 writeLocation(Ref.Location, Strings, OS);
354 std::pair<SymbolID, std::vector<Ref>>
355 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
356 std::pair<SymbolID, std::vector<Ref>> Result;
357 Result.first = Data.consumeID();
358 Result.second.resize(Data.consumeVar());
359 for (
auto &Ref : Result.second) {
360 Ref.Kind =
static_cast<RefKind>(Data.consume8());
361 Ref.Location = readLocation(Data, Strings);
373 void writeRelation(
const Relation &R, llvm::raw_ostream &OS) {
374 OS << R.Subject.raw();
375 OS.write(static_cast<uint8_t>(R.Predicate));
376 OS << R.Object.raw();
379 Relation readRelation(Reader &Data) {
380 SymbolID Subject = Data.consumeID();
383 return {Subject, Predicate, Object};
386 struct InternedCompileCommand {
391 void writeCompileCommand(
const InternedCompileCommand &Cmd,
392 const StringTableOut &Strings,
393 llvm::raw_ostream &CmdOS) {
394 writeVar(Strings.index(Cmd.Directory), CmdOS);
395 writeVar(Cmd.CommandLine.size(), CmdOS);
396 for (llvm::StringRef C : Cmd.CommandLine)
397 writeVar(Strings.index(C), CmdOS);
400 InternedCompileCommand
401 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
402 InternedCompileCommand Cmd;
403 Cmd.Directory = CmdReader.consumeString(Strings);
404 Cmd.CommandLine.resize(CmdReader.consumeVar());
405 for (llvm::StringRef &C : Cmd.CommandLine)
406 C = CmdReader.consumeString(Strings);
422 constexpr
static uint32_t Version = 12;
424 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
427 return RIFF.takeError();
430 llvm::StringMap<llvm::StringRef> Chunks;
431 for (
const auto &Chunk :
RIFF->Chunks)
432 Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
435 for (llvm::StringRef RequiredChunk : {
"meta",
"stri"})
436 if (!Chunks.count(RequiredChunk))
437 return makeError(
"missing required chunk " + RequiredChunk);
439 Reader Meta(Chunks.lookup(
"meta"));
440 if (Meta.consume32() != Version)
443 auto Strings = readStringTable(Chunks.lookup(
"stri"));
445 return Strings.takeError();
448 if (Chunks.count(
"srcs")) {
449 Reader SrcsReader(Chunks.lookup(
"srcs"));
450 Result.Sources.emplace();
451 while (!SrcsReader.eof()) {
452 auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
453 auto Entry = Result.Sources->try_emplace(IGN.URI).first;
454 Entry->getValue() = std::move(IGN);
458 for (
auto &Include :
Entry->getValue().DirectIncludes)
459 Include = Result.Sources->try_emplace(Include).first->getKey();
461 if (SrcsReader.err())
462 return makeError(
"malformed or truncated include uri");
465 if (Chunks.count(
"symb")) {
466 Reader SymbolReader(Chunks.lookup(
"symb"));
468 while (!SymbolReader.eof())
469 Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
470 if (SymbolReader.err())
471 return makeError(
"malformed or truncated symbol");
472 Result.Symbols = std::move(Symbols).build();
474 if (Chunks.count(
"refs")) {
475 Reader RefsReader(Chunks.lookup(
"refs"));
477 while (!RefsReader.eof()) {
478 auto RefsBundle = readRefs(RefsReader, Strings->Strings);
479 for (
const auto &Ref : RefsBundle.second)
480 Refs.insert(RefsBundle.first, Ref);
482 if (RefsReader.err())
483 return makeError(
"malformed or truncated refs");
484 Result.Refs = std::move(Refs).build();
486 if (Chunks.count(
"rela")) {
487 Reader RelationsReader(Chunks.lookup(
"rela"));
489 while (!RelationsReader.eof()) {
490 auto Relation = readRelation(RelationsReader);
491 Relations.insert(Relation);
493 if (RelationsReader.err())
494 return makeError(
"malformed or truncated relations");
495 Result.Relations = std::move(Relations).build();
497 if (Chunks.count(
"cmdl")) {
498 Reader CmdReader(Chunks.lookup(
"cmdl"));
500 return makeError(
"malformed or truncated commandline section");
501 InternedCompileCommand Cmd =
502 readCompileCommand(CmdReader, Strings->Strings);
503 Result.Cmd.emplace();
504 Result.Cmd->Directory = Cmd.Directory;
505 Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
506 for (llvm::StringRef C : Cmd.CommandLine)
507 Result.Cmd->CommandLine.emplace_back(C);
509 return std::move(Result);
512 template <
class Callback>
515 for (llvm::StringRef &Include : IGN.DirectIncludes)
519 void writeRIFF(
const IndexFileOut &Data, llvm::raw_ostream &OS) {
520 assert(Data.Symbols &&
"An index file without symbols makes no sense!");
524 llvm::SmallString<4> Meta;
526 llvm::raw_svector_ostream MetaOS(Meta);
527 write32(Version, MetaOS);
533 for (
const auto &Sym : *Data.Symbols) {
534 Symbols.emplace_back(Sym);
536 [&](llvm::StringRef &S) { Strings.intern(S); });
538 std::vector<IncludeGraphNode> Sources;
540 for (
const auto &Source : *Data.Sources) {
541 Sources.push_back(Source.getValue());
543 [&](llvm::StringRef &S) { Strings.intern(S); });
546 std::vector<std::pair<SymbolID, std::vector<Ref>>>
Refs;
548 for (
const auto &Sym : *Data.Refs) {
549 Refs.emplace_back(Sym);
550 for (
auto &Ref : Refs.back().second) {
551 llvm::StringRef
File = Ref.Location.FileURI;
552 Strings.intern(File);
553 Ref.Location.FileURI = File.data();
558 std::vector<Relation> Relations;
559 if (Data.Relations) {
560 for (
const auto &Relation : *Data.Relations) {
561 Relations.emplace_back(Relation);
566 InternedCompileCommand InternedCmd;
568 InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
569 InternedCmd.Directory = Data.Cmd->Directory;
570 Strings.intern(InternedCmd.Directory);
571 for (llvm::StringRef C : Data.Cmd->CommandLine) {
572 InternedCmd.CommandLine.emplace_back(C);
573 Strings.intern(InternedCmd.CommandLine.back());
577 std::string StringSection;
579 llvm::raw_string_ostream StringOS(StringSection);
580 Strings.finalize(StringOS);
582 RIFF.Chunks.push_back({
riff::fourCC(
"stri"), StringSection});
584 std::string SymbolSection;
586 llvm::raw_string_ostream SymbolOS(SymbolSection);
587 for (
const auto &Sym : Symbols)
588 writeSymbol(Sym, Strings, SymbolOS);
590 RIFF.Chunks.push_back({
riff::fourCC(
"symb"), SymbolSection});
592 std::string RefsSection;
595 llvm::raw_string_ostream RefsOS(RefsSection);
596 for (
const auto &Sym : Refs)
597 writeRefs(Sym.first, Sym.second, Strings, RefsOS);
599 RIFF.Chunks.push_back({
riff::fourCC(
"refs"), RefsSection});
602 std::string RelationSection;
603 if (Data.Relations) {
605 llvm::raw_string_ostream RelationOS{RelationSection};
606 for (
const auto &Relation : Relations)
607 writeRelation(Relation, RelationOS);
609 RIFF.Chunks.push_back({
riff::fourCC(
"rela"), RelationSection});
612 std::string SrcsSection;
615 llvm::raw_string_ostream SrcsOS(SrcsSection);
616 for (
const auto &SF : Sources)
617 writeIncludeGraphNode(SF, Strings, SrcsOS);
619 RIFF.Chunks.push_back({
riff::fourCC(
"srcs"), SrcsSection});
622 std::string CmdlSection;
625 llvm::raw_string_ostream CmdOS(CmdlSection);
626 writeCompileCommand(InternedCmd, Strings, CmdOS);
628 RIFF.Chunks.push_back({
riff::fourCC(
"cmdl"), CmdlSection});
637 void writeYAML(
const IndexFileOut &, llvm::raw_ostream &);
638 llvm::Expected<IndexFileIn>
readYAML(llvm::StringRef);
653 if (Data.startswith(
"RIFF")) {
654 return readRIFF(Data);
655 }
else if (
auto YAMLContents =
readYAML(Data)) {
656 return std::move(*YAMLContents);
658 return makeError(
"Not a RIFF file and failed to parse as YAML: " +
663 std::unique_ptr<SymbolIndex>
loadIndex(llvm::StringRef SymbolFilename,
666 auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
668 elog(
"Can't open {0}", SymbolFilename);
679 Symbols = std::move(*I->Symbols);
681 Refs = std::move(*I->Refs);
683 Relations = std::move(*I->Relations);
685 elog(
"Bad Index: {0}", I.takeError());
690 size_t NumSym = Symbols.
size();
691 size_t NumRefs = Refs.
numRefs();
692 size_t NumRelations = Relations.
size();
695 auto Index = UseDex ?
dex::Dex::build(std::move(Symbols), std::move(Refs),
696 std::move(Relations))
698 std::move(Relations));
699 vlog(
"Loaded {0} from {1} with estimated memory usage {2} bytes\n" 700 " - number of symbols: {3}\n" 701 " - number of refs: {4}\n" 702 " - numnber of relations: {5}",
703 UseDex ?
"Dex" :
"MemIndex", SymbolFilename,
704 Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
SourceLocation Loc
'#' location in the include directive
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef)
An immutable symbol container that stores a set of symbols.
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
An efficient structure of storing large set of symbol references in memory.
static llvm::Error makeError(const char *Msg)
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
clang::find_all_symbols::SymbolInfo::SymbolKind SymbolKind
constexpr FourCC fourCC(const char(&Literal)[5])
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
llvm::Expected< File > readFile(llvm::StringRef Stream)
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data)
void vlog(const char *Fmt, Ts &&... Vals)
void elog(const char *Fmt, Ts &&... Vals)
std::vector< llvm::StringRef > CommandLine
static constexpr size_t RawSize
llvm::BumpPtrAllocator Arena
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
RefKind
Describes the kind of a cross-reference.
llvm::StringRef Directory
std::vector< llvm::StringRef > Strings
CodeCompletionBuilder Builder
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Records an event whose duration is the lifetime of the Span object.
std::array< uint8_t, 20 > SymbolID
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
const SymbolIndex * Index