17 #include "clang/AST/ASTContext.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Driver/Types.h"
23 #include "clang/Format/Format.h"
24 #include "clang/Lex/Lexer.h"
25 #include "clang/Lex/Preprocessor.h"
26 #include "clang/Lex/Token.h"
27 #include "clang/Tooling/Core/Replacement.h"
28 #include "clang/Tooling/Syntax/Tokens.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/None.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/LineIterator.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/SHA1.h"
43 #include "llvm/Support/VirtualFileSystem.h"
44 #include "llvm/Support/xxhash.h"
64 template <
typename Callback>
66 bool LoggedInvalid =
false;
69 for (
size_t I = 0; I < U8.size();) {
70 unsigned char C = static_cast<unsigned char>(U8[I]);
71 if (LLVM_LIKELY(!(C & 0x80))) {
78 size_t UTF8Length = llvm::countLeadingOnes(C);
81 if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) {
83 elog(
"File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8));
97 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
134 llvm_unreachable(
"unsupported encoding");
137 if (Result > U8.size()) {
170 llvm_unreachable(
"unsupported encoding");
176 bool AllowColumnsBeyondLineLength) {
178 return llvm::make_error<llvm::StringError>(
179 llvm::formatv(
"Line value can't be negative ({0})", P.
line),
180 llvm::errc::invalid_argument);
182 return llvm::make_error<llvm::StringError>(
183 llvm::formatv(
"Character value can't be negative ({0})", P.
character),
184 llvm::errc::invalid_argument);
185 size_t StartOfLine = 0;
186 for (
int I = 0; I != P.
line; ++I) {
187 size_t NextNL =
Code.find(
'\n', StartOfLine);
188 if (NextNL == llvm::StringRef::npos)
189 return llvm::make_error<llvm::StringError>(
190 llvm::formatv(
"Line value is out of range ({0})", P.
line),
191 llvm::errc::invalid_argument);
192 StartOfLine = NextNL + 1;
195 Code.substr(StartOfLine).take_until([](
char C) {
return C ==
'\n'; });
200 if (!Valid && !AllowColumnsBeyondLineLength)
201 return llvm::make_error<llvm::StringError>(
202 llvm::formatv(
"{0} offset {1} is invalid for line {2}",
lspEncoding(),
204 llvm::errc::invalid_argument);
205 return StartOfLine + ByteInLine;
210 llvm::StringRef Before =
Code.substr(0,
Offset);
211 int Lines = Before.count(
'\n');
212 size_t PrevNL = Before.rfind(
'\n');
213 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
224 std::tie(FID,
Offset) = SM.getDecomposedSpellingLoc(
Loc);
226 P.
line = static_cast<int>(SM.getLineNumber(FID,
Offset)) - 1;
227 bool Invalid =
false;
228 llvm::StringRef
Code = SM.getBufferData(FID, &Invalid);
230 auto ColumnInBytes = SM.getColumnNumber(FID,
Offset) - 1;
231 auto LineSoFar =
Code.substr(
Offset - ColumnInBytes, ColumnInBytes);
238 if (
Loc.isMacroID()) {
239 std::string PrintLoc = SM.getSpellingLoc(
Loc).printToString(SM);
240 if (llvm::StringRef(PrintLoc).startswith(
"<scratch") ||
241 llvm::StringRef(PrintLoc).startswith(
"<command line>"))
248 if (!R.getBegin().isValid() || !R.getEnd().isValid())
252 size_t BeginOffset = 0;
253 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
256 size_t EndOffset = 0;
257 std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
259 return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
263 assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
264 FileID IncludingFile;
266 std::tie(IncludingFile,
Offset) =
267 SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile));
268 bool Invalid =
false;
269 llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid);
271 return SourceLocation();
275 assert(
Offset < Buf.size());
278 return SM.getComposedLoc(IncludingFile,
Offset);
280 return SourceLocation();
285 const LangOptions &LangOpts) {
287 if (Lexer::getRawToken(
Loc, TheTok, SM, LangOpts))
294 if (TheTok.is(tok::greatergreater))
296 return TheTok.getLength();
301 const SourceManager &SM,
302 const LangOptions &LangOpts) {
304 return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0);
309 const SourceManager &SM,
310 const LangOptions &LangOpts) {
311 return EndLoc.getLocWithOffset(
317 const LangOptions &LangOpts) {
318 if (!
Range.isTokenRange())
320 return Range.getAsRange();
326 const SourceManager &SM,
327 const LangOptions &LangOpts) {
328 SourceLocation Begin =
329 SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin())
337 return SourceRange(Begin, End);
344 const LangOptions &LangOpts) {
346 if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd()))
349 llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
350 for (SourceLocation Begin = R.getBegin(); Begin.isValid();
351 Begin = Begin.isFileID()
353 : SM.getImmediateExpansionRange(Begin).getBegin()) {
354 BeginExpansions[SM.getFileID(Begin)] = Begin;
358 for (SourceLocation End = R.getEnd(); End.isValid();
363 auto It = BeginExpansions.find(SM.getFileID(End));
364 if (It != BeginExpansions.end()) {
365 if (SM.getFileOffset(It->second) > SM.getFileOffset(End))
366 return SourceLocation();
367 return {It->second, End};
370 return SourceRange();
377 const LangOptions &LangOpts) {
394 const SourceManager &SM,
395 const LangOptions &LangOpts) {
396 SourceRange FileRange =
Loc;
397 while (!FileRange.getBegin().isFileID()) {
398 if (SM.isMacroArgExpansion(FileRange.getBegin())) {
400 SM.getImmediateSpellingLoc(FileRange.getBegin()),
401 SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts);
402 assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd()));
404 SourceRange ExpansionRangeForBegin =
406 SourceRange ExpansionRangeForEnd =
408 if (ExpansionRangeForBegin.isInvalid() ||
409 ExpansionRangeForEnd.isInvalid())
410 return SourceRange();
411 assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(),
412 ExpansionRangeForEnd.getBegin()) &&
413 "Both Expansion ranges should be in same file.");
414 FileRange =
unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
424 FileID FID = SM.getFileID(SM.getExpansionLoc(
Loc));
425 return FID == SM.getMainFileID() || FID == SM.getPreambleFileID();
429 const LangOptions &LangOpts,
443 Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));
452 bool Invalid =
false;
453 auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
456 size_t BeginOffset = SM.getFileOffset(R.getBegin());
457 size_t EndOffset = SM.getFileOffset(R.getEnd());
458 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
463 llvm::StringRef
Code = SM.getBuffer(SM.getMainFileID())->
getBuffer();
467 return Offset.takeError();
468 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*
Offset);
482 llvm::StringRef Before =
Code.substr(0,
Offset);
483 int Lines = Before.count(
'\n');
484 size_t PrevNL = Before.rfind(
'\n');
485 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
490 size_t Pos = QName.rfind(
"::");
491 if (
Pos == llvm::StringRef::npos)
492 return {llvm::StringRef(), QName};
493 return {QName.substr(0,
Pos + 2), QName.substr(
Pos + 2)};
497 const tooling::Replacement &R) {
498 Range ReplacementRange = {
501 return {ReplacementRange, std::string(R.getReplacementText())};
505 const tooling::Replacements &Repls) {
506 std::vector<TextEdit> Edits;
507 for (
const auto &R : Repls)
517 llvm::SmallString<128> FilePath = F->getName();
518 if (!llvm::sys::path::is_absolute(FilePath)) {
520 SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute(
522 elog(
"Could not turn relative path '{0}' to absolute: {1}", FilePath,
539 if (
auto Dir =
SourceMgr.getFileManager().getDirectory(
540 llvm::sys::path::parent_path(FilePath))) {
541 llvm::SmallString<128> RealPath;
542 llvm::StringRef DirName =
SourceMgr.getFileManager().getCanonicalName(*Dir);
543 llvm::sys::path::append(RealPath, DirName,
544 llvm::sys::path::filename(FilePath));
545 return RealPath.str().str();
548 return FilePath.str().str();
552 const LangOptions &L) {
556 Result.newText =
FixIt.CodeToInsert;
561 uint64_t Hash{llvm::xxHash64(Content)};
563 for (
unsigned I = 0; I < Result.size(); ++I) {
564 Result[I] = uint8_t(Hash);
570 llvm::Optional<FileDigest>
digestFile(
const SourceManager &SM, FileID FID) {
571 bool Invalid =
false;
572 llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
579 llvm::StringRef Content,
581 auto Style = format::getStyle(format::DefaultFormatStyle, File,
582 format::DefaultFallbackStyle, Content,
585 log(
"getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
587 return format::getLLVMStyle();
592 llvm::Expected<tooling::Replacements>
595 auto CleanReplaces = cleanupAroundReplacements(
Code, Replaces, Style);
597 return CleanReplaces;
598 return formatReplacements(
Code, std::move(*CleanReplaces), Style);
602 lex(llvm::StringRef
Code,
const LangOptions &LangOpts,
603 llvm::function_ref<
void(
const syntax::Token &,
const SourceManager &SM)>
606 std::string NullTerminatedCode =
Code.str();
607 SourceManagerForFile FileSM(
"dummy.cpp", NullTerminatedCode);
608 auto &SM = FileSM.get();
609 for (
const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
615 llvm::StringMap<unsigned> Identifiers;
616 auto LangOpt = format::getFormattingLangOpts(Style);
617 lex(Content, LangOpt, [&](
const syntax::Token &Tok,
const SourceManager &SM) {
618 if (Tok.kind() == tok::identifier)
619 ++Identifiers[Tok.text(SM)];
621 else if (
const auto *
Keyword = tok::getKeywordSpelling(Tok.kind()))
628 llvm::StringRef Content,
629 const LangOptions &LangOpts) {
630 std::vector<Range> Ranges;
631 lex(Content, LangOpts,
632 [&](
const syntax::Token &Tok,
const SourceManager &SM) {
633 if (Tok.kind() != tok::identifier || Tok.text(SM) !=
Identifier)
641 struct NamespaceEvent {
652 void parseNamespaceEvents(llvm::StringRef
Code,
const LangOptions &LangOpts,
653 llvm::function_ref<
void(NamespaceEvent)>
Callback) {
656 std::vector<std::string> Enclosing;
658 std::vector<bool> BraceStack;
670 NamespaceEvent
Event;
671 lex(
Code, LangOpts, [&](
const syntax::Token &Tok,
const SourceManager &SM) {
673 switch (Tok.kind()) {
675 State = State == Default ? Using : Default;
677 case tok::kw_namespace:
680 State = UsingNamespace;
690 case tok::identifier:
695 case UsingNamespaceName:
696 NSName.append(Tok.text(SM).str());
697 State = UsingNamespaceName;
703 NSName.append(Tok.text(SM).str());
704 State = NamespaceName;
712 case tok::coloncolon:
719 case UsingNamespaceName:
721 State = UsingNamespaceName;
725 State = NamespaceName;
736 if (State == NamespaceName) {
738 BraceStack.push_back(
true);
739 Enclosing.push_back(NSName);
740 Event.Trigger = NamespaceEvent::BeginNamespace;
746 BraceStack.push_back(
false);
753 if (!BraceStack.empty()) {
754 if (BraceStack.back()) {
756 Enclosing.pop_back();
757 Event.Trigger = NamespaceEvent::EndNamespace;
761 BraceStack.pop_back();
765 if (State == UsingNamespaceName) {
767 Event.Trigger = NamespaceEvent::UsingDirective;
768 Event.Payload = std::move(NSName);
781 llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
782 llvm::SmallVector<llvm::StringRef, 8>
Results;
783 Results.push_back(NS.take_front(0));
784 NS.split(
Results,
"::", -1,
false);
785 for (llvm::StringRef &R :
Results)
786 R = NS.take_front(R.end() - NS.begin());
793 const LangOptions &LangOpts) {
796 llvm::StringMap<llvm::StringSet<>> UsingDirectives;
798 parseNamespaceEvents(
Code, LangOpts, [&](NamespaceEvent
Event) {
799 llvm::StringRef NS =
Event.Payload;
800 switch (
Event.Trigger) {
801 case NamespaceEvent::BeginNamespace:
802 case NamespaceEvent::EndNamespace:
803 Current = std::move(Event.Payload);
805 case NamespaceEvent::UsingDirective:
806 if (NS.consume_front(
"::"))
807 UsingDirectives[Current].insert(NS);
809 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
810 if (Enclosing.empty())
811 UsingDirectives[Current].insert(NS);
813 UsingDirectives[Current].insert((Enclosing +
"::" + NS).str());
820 std::vector<std::string> Found;
821 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
822 Found.push_back(std::string(Enclosing));
823 auto It = UsingDirectives.find(Enclosing);
824 if (It != UsingDirectives.end())
825 for (
const auto &Used : It->second)
826 Found.push_back(std::string(Used.getKey()));
829 llvm::sort(Found, [&](
const std::string &LHS,
const std::string &RHS) {
836 Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
844 static constexpr
int MinWordLength = 4;
846 std::vector<CharRole> Roles(Content.size());
849 llvm::StringSet<> Result;
850 llvm::SmallString<256>
Word;
852 if (
Word.size() >= MinWordLength) {
854 C = llvm::toLower(C);
859 for (
unsigned I = 0; I < Content.size(); ++I) {
865 Word.push_back(Content[I]);
879 llvm::StringRef After) {
881 if (Before.endswith(
"`") && After.startswith(
"`"))
884 if (Before.endswith(
"::") || After.startswith(
"::"))
889 Before = Before.take_back(100);
890 auto Pos = Before.find_last_of(
"\\@");
891 if (
Pos != llvm::StringRef::npos) {
892 llvm::StringRef
Tag = Before.substr(
Pos + 1).rtrim(
' ');
893 if (
Tag ==
"p" ||
Tag ==
"c" ||
Tag ==
"class" ||
Tag ==
"tparam" ||
894 Tag ==
"param" ||
Tag ==
"param[in]" ||
Tag ==
"param[out]" ||
895 Tag ==
"param[in,out]" ||
Tag ==
"retval" ||
Tag ==
"throw" ||
896 Tag ==
"throws" ||
Tag ==
"link")
902 if (
Word.contains(
'_')) {
909 bool HasLower =
Word.find_if(clang::isLowercase) != StringRef::npos;
910 bool HasUpper =
Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
911 if (HasLower && HasUpper) {
918 llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc,
919 const syntax::TokenBuffer &TB,
920 const LangOptions &LangOpts) {
921 const auto &SM = TB.sourceManager();
922 auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB);
923 for (
const auto &T : Touching) {
925 if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) {
927 Result.Location = T.location();
928 Result.Text = T.text(SM);
929 Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind());
930 Result.PartOfSpelledToken = &T;
931 Result.SpelledToken = &T;
933 TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location()));
934 if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text)
935 Result.ExpandedToken = &Expanded.front();
941 std::tie(File,
Offset) = SM.getDecomposedLoc(SpelledLoc);
942 bool Invalid =
false;
943 llvm::StringRef
Code = SM.getBufferData(File, &Invalid);
947 while (B > 0 && isIdentifierBody(
Code[B - 1]))
949 while (
E <
Code.size() && isIdentifierBody(
Code[
E]))
955 Result.Location = SM.getComposedLoc(File, B);
956 Result.Text =
Code.slice(B,
E);
957 Result.LikelyIdentifier =
960 tok::isAnyIdentifier(
961 IdentifierTable(LangOpts).get(Result.Text).getTokenID());
962 for (
const auto &T : Touching)
963 if (T.location() <= Result.Location)
964 Result.PartOfSpelledToken = &T;
971 assert(
Loc.isFileID());
972 const auto &SM =
PP.getSourceManager();
973 IdentifierInfo *IdentifierInfo =
PP.getIdentifierInfo(
SpelledTok.text(SM));
974 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition())
980 if (SM.getLocForStartOfFile(SM.getFileID(
Loc)) !=
Loc)
981 Loc =
Loc.getLocWithOffset(-1);
982 MacroDefinition MacroDef =
PP.getMacroDefinitionAtLoc(IdentifierInfo,
Loc);
983 if (
auto *MI = MacroDef.getMacroInfo())
985 IdentifierInfo->getName(), MI,
990 llvm::Expected<std::string> Edit::apply()
const {
991 return tooling::applyAllReplacements(InitialCode, Replacements);
994 std::vector<TextEdit> Edit::asTextEdits()
const {
998 bool Edit::canApplyTo(llvm::StringRef
Code)
const {
1001 auto LHS = llvm::MemoryBuffer::getMemBuffer(
Code);
1002 llvm::line_iterator LHSIt(*LHS,
false);
1004 auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode);
1005 llvm::line_iterator RHSIt(*RHS,
false);
1011 while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) {
1012 if (*LHSIt != *RHSIt)
1021 while (!LHSIt.is_at_eof()) {
1022 if (!LHSIt->empty())
1026 while (!RHSIt.is_at_eof()) {
1027 if (!RHSIt->empty())
1036 E.Replacements = std::move(*NewEdits);
1038 return NewEdits.takeError();
1039 return llvm::Error::success();
1043 llvm::StringRef FullyQualifiedName,
1044 const LangOptions &LangOpts) {
1047 std::vector<std::string> Enclosing = {
""};
1051 parseNamespaceEvents(
Code, LangOpts, [&](NamespaceEvent
Event) {
1054 if (
Event.Trigger == NamespaceEvent::UsingDirective)
1057 if (!
Event.Payload.empty())
1058 Event.Payload.append(
"::");
1060 std::string CurrentNamespace;
1061 if (
Event.Trigger == NamespaceEvent::BeginNamespace) {
1062 Enclosing.emplace_back(std::move(
Event.Payload));
1063 CurrentNamespace = Enclosing.back();
1066 ++
Event.Pos.character;
1070 CurrentNamespace = std::move(Enclosing.back());
1071 Enclosing.pop_back();
1072 assert(Enclosing.back() ==
Event.Payload);
1076 if (!FullyQualifiedName.startswith(CurrentNamespace))
1096 llvm::Optional<LangOptions> LangOpts) {
1099 if (LangOpts && LangOpts->IsHeaderFile)
1101 namespace types = clang::driver::types;
1102 auto Lang = types::lookupTypeForExtension(
1103 llvm::sys::path::extension(
FileName).substr(1));
1104 return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang);
1111 auto FID = SM.getFileID(
Loc);
1113 static const char *PROTO_HEADER_COMMENT =
1114 "// Generated by the protocol buffer compiler. DO NOT EDIT!";
1116 return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT);