clang-tools  11.0.0
Selection.cpp
Go to the documentation of this file.
1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Selection.h"
10 #include "SourceCode.h"
11 #include "support/Logger.h"
12 #include "support/Trace.h"
13 #include "clang/AST/ASTTypeTraits.h"
14 #include "clang/AST/Decl.h"
15 #include "clang/AST/DeclCXX.h"
16 #include "clang/AST/Expr.h"
17 #include "clang/AST/ExprCXX.h"
18 #include "clang/AST/PrettyPrinter.h"
19 #include "clang/AST/RecursiveASTVisitor.h"
20 #include "clang/AST/TypeLoc.h"
21 #include "clang/Basic/OperatorKinds.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TokenKinds.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Tooling/Syntax/Tokens.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <algorithm>
32 #include <string>
33 
34 namespace clang {
35 namespace clangd {
36 namespace {
37 using Node = SelectionTree::Node;
38 using ast_type_traits::DynTypedNode;
39 
40 // Measure the fraction of selections that were enabled by recovery AST.
41 void recordMetrics(const SelectionTree &S) {
42  static constexpr trace::Metric SelectionUsedRecovery(
43  "selection_recovery", trace::Metric::Distribution);
44  static constexpr trace::Metric RecoveryType("selection_recovery_type",
46  const auto *Common = S.commonAncestor();
47  for (const auto *N = Common; N; N = N->Parent) {
48  if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
49  SelectionUsedRecovery.record(1); // used recovery ast.
50  RecoveryType.record(RE->isTypeDependent() ? 0 : 1);
51  return;
52  }
53  }
54  if (Common)
55  SelectionUsedRecovery.record(0); // unused.
56 }
57 
58 // An IntervalSet maintains a set of disjoint subranges of an array.
59 //
60 // Initially, it contains the entire array.
61 // [-----------------------------------------------------------]
62 //
63 // When a range is erased(), it will typically split the array in two.
64 // Claim: [--------------------]
65 // after: [----------------] [-------------------]
66 //
67 // erase() returns the segments actually erased. Given the state above:
68 // Claim: [---------------------------------------]
69 // Out: [---------] [------]
70 // After: [-----] [-----------]
71 //
72 // It is used to track (expanded) tokens not yet associated with an AST node.
73 // On traversing an AST node, its token range is erased from the unclaimed set.
74 // The tokens actually removed are associated with that node, and hit-tested
75 // against the selection to determine whether the node is selected.
76 template <typename T> class IntervalSet {
77 public:
78  IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
79 
80  // Removes the elements of Claim from the set, modifying or removing ranges
81  // that overlap it.
82  // Returns the continuous subranges of Claim that were actually removed.
83  llvm::SmallVector<llvm::ArrayRef<T>, 4> erase(llvm::ArrayRef<T> Claim) {
84  llvm::SmallVector<llvm::ArrayRef<T>, 4> Out;
85  if (Claim.empty())
86  return Out;
87 
88  // General case:
89  // Claim: [-----------------]
90  // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
91  // Overlap: ^first ^second
92  // Ranges C and D are fully included. Ranges B and E must be trimmed.
93  auto Overlap = std::make_pair(
94  UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
95  UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
96  // Rewind to cover B.
97  if (Overlap.first != UnclaimedRanges.begin()) {
98  --Overlap.first;
99  // ...unless B isn't selected at all.
100  if (Overlap.first->end() <= Claim.begin())
101  ++Overlap.first;
102  }
103  if (Overlap.first == Overlap.second)
104  return Out;
105 
106  // First, copy all overlapping ranges into the output.
107  auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
108  // If any of the overlapping ranges were sliced by the claim, split them:
109  // - restrict the returned range to the claimed part
110  // - save the unclaimed part so it can be reinserted
111  llvm::ArrayRef<T> RemainingHead, RemainingTail;
112  if (Claim.begin() > OutFirst->begin()) {
113  RemainingHead = {OutFirst->begin(), Claim.begin()};
114  *OutFirst = {Claim.begin(), OutFirst->end()};
115  }
116  if (Claim.end() < Out.back().end()) {
117  RemainingTail = {Claim.end(), Out.back().end()};
118  Out.back() = {Out.back().begin(), Claim.end()};
119  }
120 
121  // Erase all the overlapping ranges (invalidating all iterators).
122  UnclaimedRanges.erase(Overlap.first, Overlap.second);
123  // Reinsert ranges that were merely trimmed.
124  if (!RemainingHead.empty())
125  UnclaimedRanges.insert(RemainingHead);
126  if (!RemainingTail.empty())
127  UnclaimedRanges.insert(RemainingTail);
128 
129  return Out;
130  }
131 
132 private:
133  using TokenRange = llvm::ArrayRef<T>;
134  struct RangeLess {
135  bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
136  return L.begin() < R.begin();
137  }
138  };
139 
140  // Disjoint sorted unclaimed ranges of expanded tokens.
141  std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
142 };
143 
144 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
145 // This resolves to Unselected if no tokens are ever seen.
146 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
147 // This value is never exposed publicly.
148 constexpr SelectionTree::Selection NoTokens =
149  static_cast<SelectionTree::Selection>(
150  static_cast<unsigned char>(SelectionTree::Complete + 1));
151 
152 // Nodes start with NoTokens, and then use this function to aggregate the
153 // selectedness as more tokens are found.
154 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
155  if (New == NoTokens)
156  return;
157  if (Result == NoTokens)
158  Result = New;
159  else if (Result != New)
160  // Can only be completely selected (or unselected) if all tokens are.
161  Result = SelectionTree::Partial;
162 }
163 
164 // As well as comments, don't count semicolons as real tokens.
165 // They're not properly claimed as expr-statement is missing from the AST.
166 bool shouldIgnore(const syntax::Token &Tok) {
167  return Tok.kind() == tok::comment || Tok.kind() == tok::semi;
168 }
169 
170 // Determine whether 'Target' is the first expansion of the macro
171 // argument whose top-level spelling location is 'SpellingLoc'.
172 bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
173  const SourceManager &SM) {
174  SourceLocation Prev = SpellingLoc;
175  while (true) {
176  // If the arg is expanded multiple times, getMacroArgExpandedLocation()
177  // returns the first expansion.
178  SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
179  // So if we reach the target, target is the first-expansion of the
180  // first-expansion ...
181  if (SM.getFileID(Next) == Target)
182  return true;
183 
184  // Otherwise, if the FileID stops changing, we've reached the innermost
185  // macro expansion, and Target was on a different branch.
186  if (SM.getFileID(Next) == SM.getFileID(Prev))
187  return false;
188 
189  Prev = Next;
190  }
191  return false;
192 }
193 
194 // SelectionTester can determine whether a range of tokens from the PP-expanded
195 // stream (corresponding to an AST node) is considered selected.
196 //
197 // When the tokens result from macro expansions, the appropriate tokens in the
198 // main file are examined (macro invocation or args). Similarly for #includes.
199 // However, only the first expansion of a given spelled token is considered
200 // selected.
201 //
202 // It tests each token in the range (not just the endpoints) as contiguous
203 // expanded tokens may not have contiguous spellings (with macros).
204 //
205 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
206 // are ignored when determining selectedness.
207 class SelectionTester {
208 public:
209  // The selection is offsets [SelBegin, SelEnd) in SelFile.
210  SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
211  unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
212  : SelFile(SelFile), SM(SM) {
213  // Find all tokens (partially) selected in the file.
214  auto AllSpelledTokens = Buf.spelledTokens(SelFile);
215  const syntax::Token *SelFirst =
216  llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
217  return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
218  });
219  const syntax::Token *SelLimit = std::partition_point(
220  SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
221  return SM.getFileOffset(Tok.location()) < SelEnd;
222  });
223  // Precompute selectedness and offset for selected spelled tokens.
224  for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) {
225  if (shouldIgnore(*T))
226  continue;
227  SpelledTokens.emplace_back();
228  Tok &S = SpelledTokens.back();
229  S.Offset = SM.getFileOffset(T->location());
230  if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd)
231  S.Selected = SelectionTree::Complete;
232  else
233  S.Selected = SelectionTree::Partial;
234  }
235  }
236 
237  // Test whether a consecutive range of tokens is selected.
238  // The tokens are taken from the expanded token stream.
240  test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
241  if (SpelledTokens.empty())
242  return NoTokens;
243  SelectionTree::Selection Result = NoTokens;
244  while (!ExpandedTokens.empty()) {
245  // Take consecutive tokens from the same context together for efficiency.
246  FileID FID = SM.getFileID(ExpandedTokens.front().location());
247  auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
248  return SM.getFileID(T.location()) == FID;
249  });
250  assert(!Batch.empty());
251  ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
252 
253  update(Result, testChunk(FID, Batch));
254  }
255  return Result;
256  }
257 
258  // Cheap check whether any of the tokens in R might be selected.
259  // If it returns false, test() will return NoTokens or Unselected.
260  // If it returns true, test() may return any value.
261  bool mayHit(SourceRange R) const {
262  if (SpelledTokens.empty())
263  return false;
264  auto B = SM.getDecomposedLoc(R.getBegin());
265  auto E = SM.getDecomposedLoc(R.getEnd());
266  if (B.first == SelFile && E.first == SelFile)
267  if (E.second < SpelledTokens.front().Offset ||
268  B.second > SpelledTokens.back().Offset)
269  return false;
270  return true;
271  }
272 
273 private:
274  // Hit-test a consecutive range of tokens from a single file ID.
276  testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
277  assert(!Batch.empty());
278  SourceLocation StartLoc = Batch.front().location();
279  // There are several possible categories of FileID depending on how the
280  // preprocessor was used to generate these tokens:
281  // main file, #included file, macro args, macro bodies.
282  // We need to identify the main-file tokens that represent Batch, and
283  // determine whether we want to exclusively claim them. Regular tokens
284  // represent one AST construct, but a macro invocation can represent many.
285 
286  // Handle tokens written directly in the main file.
287  if (FID == SelFile) {
288  return testTokenRange(SM.getFileOffset(Batch.front().location()),
289  SM.getFileOffset(Batch.back().location()));
290  }
291 
292  // Handle tokens in another file #included into the main file.
293  // Check if the #include is selected, but don't claim it exclusively.
294  if (StartLoc.isFileID()) {
295  for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
296  Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
297  if (SM.getFileID(Loc) == SelFile)
298  // FIXME: use whole #include directive, not just the filename string.
299  return testToken(SM.getFileOffset(Loc));
300  }
301  return NoTokens;
302  }
303 
304  assert(StartLoc.isMacroID());
305  // Handle tokens that were passed as a macro argument.
306  SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
307  if (SM.getFileID(ArgStart) == SelFile) {
308  if (isFirstExpansion(FID, ArgStart, SM)) {
309  SourceLocation ArgEnd =
310  SM.getTopMacroCallerLoc(Batch.back().location());
311  return testTokenRange(SM.getFileOffset(ArgStart),
312  SM.getFileOffset(ArgEnd));
313  } else {
314  /* fall through and treat as part of the macro body */
315  }
316  }
317 
318  // Handle tokens produced by non-argument macro expansion.
319  // Check if the macro name is selected, don't claim it exclusively.
320  auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
321  if (Expansion.first == SelFile)
322  // FIXME: also check ( and ) for function-like macros?
323  return testToken(Expansion.second);
324  else
325  return NoTokens;
326  }
327 
328  // Is the closed token range [Begin, End] selected?
329  SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
330  assert(Begin <= End);
331  // Outside the selection entirely?
332  if (End < SpelledTokens.front().Offset ||
333  Begin > SpelledTokens.back().Offset)
335 
336  // Compute range of tokens.
337  auto B = llvm::partition_point(
338  SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
339  auto E = std::partition_point(
340  B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
341 
342  // Aggregate selectedness of tokens in range.
343  bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
344  End > SpelledTokens.back().Offset;
345  SelectionTree::Selection Result =
346  ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
347  for (auto It = B; It != E; ++It)
348  update(Result, It->Selected);
349  return Result;
350  }
351 
352  // Is the token at `Offset` selected?
353  SelectionTree::Selection testToken(unsigned Offset) const {
354  // Outside the selection entirely?
355  if (Offset < SpelledTokens.front().Offset ||
356  Offset > SpelledTokens.back().Offset)
358  // Find the token, if it exists.
359  auto It = llvm::partition_point(
360  SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
361  if (It != SpelledTokens.end() && It->Offset == Offset)
362  return It->Selected;
363  return NoTokens;
364  }
365 
366  struct Tok {
367  unsigned Offset;
369  };
370  std::vector<Tok> SpelledTokens;
371  FileID SelFile;
372  const SourceManager &SM;
373 };
374 
375 // Show the type of a node for debugging.
376 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
377  if (const TypeLoc *TL = N.get<TypeLoc>()) {
378  // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
379  // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
380  if (TL->getTypeLocClass() == TypeLoc::Qualified)
381  OS << "QualifiedTypeLoc";
382  else
383  OS << TL->getType()->getTypeClassName() << "TypeLoc";
384  } else {
385  OS << N.getNodeKind().asStringRef();
386  }
387 }
388 
389 #ifndef NDEBUG
390 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
391  std::string S;
392  llvm::raw_string_ostream OS(S);
393  printNodeKind(OS, N);
394  OS << " ";
395  return std::move(OS.str());
396 }
397 #endif
398 
399 bool isImplicit(const Stmt *S) {
400  // Some Stmts are implicit and shouldn't be traversed, but there's no
401  // "implicit" attribute on Stmt/Expr.
402  // Unwrap implicit casts first if present (other nodes too?).
403  if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
404  S = ICE->getSubExprAsWritten();
405  // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
406  // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
407  if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
408  if (CTI->isImplicit())
409  return true;
410  // Refs to operator() and [] are (almost?) always implicit as part of calls.
411  if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
412  if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
413  switch (FD->getOverloadedOperator()) {
414  case OO_Call:
415  case OO_Subscript:
416  return true;
417  default:
418  break;
419  }
420  }
421  }
422  return false;
423 }
424 
425 // We find the selection by visiting written nodes in the AST, looking for nodes
426 // that intersect with the selected character range.
427 //
428 // While traversing, we maintain a parent stack. As nodes pop off the stack,
429 // we decide whether to keep them or not. To be kept, they must either be
430 // selected or contain some nodes that are.
431 //
432 // For simple cases (not inside macros) we prune subtrees that don't intersect.
433 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
434 public:
435  // Runs the visitor to gather selected nodes and their ancestors.
436  // If there is any selection, the root (TUDecl) is the first node.
437  static std::deque<Node> collect(ASTContext &AST,
438  const syntax::TokenBuffer &Tokens,
439  const PrintingPolicy &PP, unsigned Begin,
440  unsigned End, FileID File) {
441  SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
442  V.TraverseAST(AST);
443  assert(V.Stack.size() == 1 && "Unpaired push/pop?");
444  assert(V.Stack.top() == &V.Nodes.front());
445  return std::move(V.Nodes);
446  }
447 
448  // We traverse all "well-behaved" nodes the same way:
449  // - push the node onto the stack
450  // - traverse its children recursively
451  // - pop it from the stack
452  // - hit testing: is intersection(node, selection) - union(children) empty?
453  // - attach it to the tree if it or any children hit the selection
454  //
455  // Two categories of nodes are not "well-behaved":
456  // - those without source range information, we don't record those
457  // - those that can't be stored in DynTypedNode.
458  // We're missing some interesting things like Attr due to the latter.
459  bool TraverseDecl(Decl *X) {
460  if (X && isa<TranslationUnitDecl>(X))
461  return Base::TraverseDecl(X); // Already pushed by constructor.
462  // Base::TraverseDecl will suppress children, but not this node itself.
463  if (X && X->isImplicit())
464  return true;
465  return traverseNode(X, [&] { return Base::TraverseDecl(X); });
466  }
467  bool TraverseTypeLoc(TypeLoc X) {
468  return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
469  }
470  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
471  return traverseNode(
472  &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
473  }
474  bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
475  return traverseNode(
476  X, [&] { return Base::TraverseConstructorInitializer(X); });
477  }
478  // Stmt is the same, but this form allows the data recursion optimization.
479  bool dataTraverseStmtPre(Stmt *X) {
480  if (!X || isImplicit(X))
481  return false;
482  auto N = DynTypedNode::create(*X);
483  if (canSafelySkipNode(N))
484  return false;
485  push(std::move(N));
486  if (shouldSkipChildren(X)) {
487  pop();
488  return false;
489  }
490  return true;
491  }
492  bool dataTraverseStmtPost(Stmt *X) {
493  pop();
494  return true;
495  }
496  // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
497  // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
498  // This means we'd never see 'int' in 'const int'! Work around that here.
499  // (The reason for the behavior is to avoid traversing the nested Type twice,
500  // but we ignore TraverseType anyway).
501  bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
502  return traverseNode<TypeLoc>(
503  &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
504  }
505  // Uninteresting parts of the AST that don't have locations within them.
506  bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
507  bool TraverseType(QualType) { return true; }
508 
509  // The DeclStmt for the loop variable claims to cover the whole range
510  // inside the parens, this causes the range-init expression to not be hit.
511  // Traverse the loop VarDecl instead, which has the right source range.
512  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
513  return traverseNode(S, [&] {
514  return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
515  TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
516  });
517  }
518  // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
519  bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
520  return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
521  }
522  // We only want to traverse the *syntactic form* to understand the selection.
523  bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
524  return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
525  }
526 
527 private:
528  using Base = RecursiveASTVisitor<SelectionVisitor>;
529 
530  SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
531  const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
532  FileID SelFile)
533  : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
534 #ifndef NDEBUG
535  PrintPolicy(PP),
536 #endif
537  TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
538  UnclaimedExpandedTokens(Tokens.expandedTokens()) {
539  // Ensure we have a node for the TU decl, regardless of traversal scope.
540  Nodes.emplace_back();
541  Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
542  Nodes.back().Parent = nullptr;
543  Nodes.back().Selected = SelectionTree::Unselected;
544  Stack.push(&Nodes.back());
545  }
546 
547  // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
548  // Node is always a pointer so the generic code can handle any null checks.
549  template <typename T, typename Func>
550  bool traverseNode(T *Node, const Func &Body) {
551  if (Node == nullptr)
552  return true;
553  auto N = DynTypedNode::create(*Node);
554  if (canSafelySkipNode(N))
555  return true;
556  push(DynTypedNode::create(*Node));
557  bool Ret = Body();
558  pop();
559  return Ret;
560  }
561 
562  // HIT TESTING
563  //
564  // We do rough hit testing on the way down the tree to avoid traversing
565  // subtrees that don't touch the selection (canSafelySkipNode), but
566  // fine-grained hit-testing is mostly done on the way back up (in pop()).
567  // This means children get to claim parts of the selection first, and parents
568  // are only selected if they own tokens that no child owned.
569  //
570  // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
571  // parent's, and a node (transitively) owns all tokens in its range.
572  //
573  // Exception 1: child range claims tokens that should be owned by the parent.
574  // e.g. in `void foo(int);`, the FunctionTypeLoc should own
575  // `void (int)` but the parent FunctionDecl should own `foo`.
576  // To handle this case, certain nodes claim small token ranges *before*
577  // their children are traversed. (see earlySourceRange).
578  //
579  // Exception 2: siblings both claim the same node.
580  // e.g. `int x, y;` produces two sibling VarDecls.
581  // ~~~~~ x
582  // ~~~~~~~~ y
583  // Here the first ("leftmost") sibling claims the tokens it wants, and the
584  // other sibling gets what's left. So selecting "int" only includes the left
585  // VarDecl in the selection tree.
586 
587  // An optimization for a common case: nodes outside macro expansions that
588  // don't intersect the selection may be recursively skipped.
589  bool canSafelySkipNode(const DynTypedNode &N) {
590  SourceRange S = N.getSourceRange();
591  if (auto *TL = N.get<TypeLoc>()) {
592  // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
593  // failing
594  // to descend into the child expression.
595  // decltype(2+2);
596  // ~~~~~~~~~~~~~ <-- correct range
597  // ~~~~~~~~ <-- range reported by getSourceRange()
598  // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren)
599  // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
600  // rid of this patch.
601  if (auto DT = TL->getAs<DecltypeTypeLoc>())
602  S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
603  }
604  if (!SelChecker.mayHit(S)) {
605  dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
606  dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
607  return true;
608  }
609  return false;
610  }
611 
612  // There are certain nodes we want to treat as leaves in the SelectionTree,
613  // although they do have children.
614  bool shouldSkipChildren(const Stmt *X) const {
615  // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
616  // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
617  // So we treat UserDefinedLiteral as a leaf node, owning the token.
618  return llvm::isa<UserDefinedLiteral>(X);
619  }
620 
621  // Pushes a node onto the ancestor stack. Pairs with pop().
622  // Performs early hit detection for some nodes (on the earlySourceRange).
623  void push(DynTypedNode Node) {
624  SourceRange Early = earlySourceRange(Node);
625  dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
626  Nodes.emplace_back();
627  Nodes.back().ASTNode = std::move(Node);
628  Nodes.back().Parent = Stack.top();
629  Nodes.back().Selected = NoTokens;
630  Stack.push(&Nodes.back());
631  claimRange(Early, Nodes.back().Selected);
632  }
633 
634  // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
635  // Performs primary hit detection.
636  void pop() {
637  Node &N = *Stack.top();
638  dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
639  claimRange(N.ASTNode.getSourceRange(), N.Selected);
640  if (N.Selected == NoTokens)
641  N.Selected = SelectionTree::Unselected;
642  if (N.Selected || !N.Children.empty()) {
643  // Attach to the tree.
644  N.Parent->Children.push_back(&N);
645  } else {
646  // Neither N any children are selected, it doesn't belong in the tree.
647  assert(&N == &Nodes.back());
648  Nodes.pop_back();
649  }
650  Stack.pop();
651  }
652 
653  // Returns the range of tokens that this node will claim directly, and
654  // is not available to the node's children.
655  // Usually empty, but sometimes children cover tokens but shouldn't own them.
656  SourceRange earlySourceRange(const DynTypedNode &N) {
657  if (const Decl *D = N.get<Decl>()) {
658  // We want constructor name to be claimed by TypeLoc not the constructor
659  // itself. Similar for deduction guides, we rather want to select the
660  // underlying TypeLoc.
661  // FIXME: Unfortunately this doesn't work, even though RecursiveASTVisitor
662  // traverses the underlying TypeLoc inside DeclarationName, it is null for
663  // constructors.
664  if (isa<CXXConstructorDecl>(D) || isa<CXXDeductionGuideDecl>(D))
665  return SourceRange();
666  // This will capture Field, Function, MSProperty, NonTypeTemplateParm and
667  // VarDecls. We want the name in the declarator to be claimed by the decl
668  // and not by any children. For example:
669  // void [[foo]]();
670  // int (*[[s]])();
671  // struct X { int [[hash]] [32]; [[operator]] int();}
672  if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D))
673  return DD->getLocation();
674  } else if (const auto *CCI = N.get<CXXCtorInitializer>()) {
675  // : [[b_]](42)
676  return CCI->getMemberLocation();
677  }
678  return SourceRange();
679  }
680 
681  // Perform hit-testing of a complete Node against the selection.
682  // This runs for every node in the AST, and must be fast in common cases.
683  // This is usually called from pop(), so we can take children into account.
684  // The existing state of Result is relevant (early/late claims can interact).
685  void claimRange(SourceRange S, SelectionTree::Selection &Result) {
686  for (const auto &ClaimedRange :
687  UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
688  update(Result, SelChecker.test(ClaimedRange));
689 
690  if (Result && Result != NoTokens)
691  dlog("{1}hit selection: {0}", S.printToString(SM), indent());
692  }
693 
694  std::string indent(int Offset = 0) {
695  // Cast for signed arithmetic.
696  int Amount = int(Stack.size()) + Offset;
697  assert(Amount >= 0);
698  return std::string(Amount, ' ');
699  }
700 
701  SourceManager &SM;
702  const LangOptions &LangOpts;
703 #ifndef NDEBUG
704  const PrintingPolicy &PrintPolicy;
705 #endif
706  const syntax::TokenBuffer &TokenBuf;
707  std::stack<Node *> Stack;
708  SelectionTester SelChecker;
709  IntervalSet<syntax::Token> UnclaimedExpandedTokens;
710  std::deque<Node> Nodes; // Stable pointers as we add more nodes.
711 };
712 
713 } // namespace
714 
715 llvm::SmallString<256> abbreviatedString(DynTypedNode N,
716  const PrintingPolicy &PP) {
717  llvm::SmallString<256> Result;
718  {
719  llvm::raw_svector_ostream OS(Result);
720  N.print(OS, PP);
721  }
722  auto Pos = Result.find('\n');
723  if (Pos != llvm::StringRef::npos) {
724  bool MoreText =
725  !llvm::all_of(llvm::StringRef(Result).drop_front(Pos), llvm::isSpace);
726  Result.resize(Pos);
727  if (MoreText)
728  Result.append(" …");
729  }
730  return Result;
731 }
732 
733 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
734  int Indent) const {
735  if (N.Selected)
736  OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
737  : '.');
738  else
739  OS.indent(Indent);
740  printNodeKind(OS, N.ASTNode);
741  OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
742  for (const Node *Child : N.Children)
743  print(OS, *Child, Indent + 2);
744 }
745 
746 std::string SelectionTree::Node::kind() const {
747  std::string S;
748  llvm::raw_string_ostream OS(S);
749  printNodeKind(OS, ASTNode);
750  return std::move(OS.str());
751 }
752 
753 // Decide which selections emulate a "point" query in between characters.
754 // If it's ambiguous (the neighboring characters are selectable tokens), returns
755 // both possibilities in preference order.
756 // Always returns at least one range - if no tokens touched, and empty range.
757 static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
758 pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
759  const auto &SM = Tokens.sourceManager();
760  SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
761  llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
762  // Prefer right token over left.
763  for (const syntax::Token &Tok :
764  llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
765  if (shouldIgnore(Tok))
766  continue;
767  unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
768  Result.emplace_back(Offset, Offset + Tok.length());
769  }
770  if (Result.empty())
771  Result.emplace_back(Offset, Offset);
772  return Result;
773 }
774 
776  const syntax::TokenBuffer &Tokens,
777  unsigned Begin, unsigned End,
778  llvm::function_ref<bool(SelectionTree)> Func) {
779  if (Begin != End)
780  return Func(SelectionTree(AST, Tokens, Begin, End));
781  for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
782  if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
783  return true;
784  return false;
785 }
786 
788  const syntax::TokenBuffer &Tokens,
789  unsigned int Begin, unsigned int End) {
790  llvm::Optional<SelectionTree> Result;
791  createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
792  Result = std::move(T);
793  return true;
794  });
795  return std::move(*Result);
796 }
797 
798 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
799  unsigned Begin, unsigned End)
800  : PrintPolicy(AST.getLangOpts()) {
801  // No fundamental reason the selection needs to be in the main file,
802  // but that's all clangd has needed so far.
803  const SourceManager &SM = AST.getSourceManager();
804  FileID FID = SM.getMainFileID();
805  PrintPolicy.TerseOutput = true;
806  PrintPolicy.IncludeNewlines = false;
807 
808  dlog("Computing selection for {0}",
809  SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
810  .printToString(SM));
811  Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
812  Root = Nodes.empty() ? nullptr : &Nodes.front();
813  recordMetrics(*this);
814  dlog("Built selection tree\n{0}", *this);
815 }
816 
818  const Node *Ancestor = Root;
819  while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
820  Ancestor = Ancestor->Children.front();
821  // Returning nullptr here is a bit unprincipled, but it makes the API safer:
822  // the TranslationUnitDecl contains all of the preamble, so traversing it is a
823  // performance cliff. Callers can check for null and use root() if they want.
824  return Ancestor != Root ? Ancestor : nullptr;
825 }
826 
827 const DeclContext &SelectionTree::Node::getDeclContext() const {
828  for (const Node *CurrentNode = this; CurrentNode != nullptr;
829  CurrentNode = CurrentNode->Parent) {
830  if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
831  if (CurrentNode != this)
832  if (auto *DC = dyn_cast<DeclContext>(Current))
833  return *DC;
834  return *Current->getDeclContext();
835  }
836  }
837  llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
838 }
839 
841  if (Children.size() == 1 &&
842  Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
843  return Children.front()->ignoreImplicit();
844  return *this;
845 }
846 
848  if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange())
849  return Parent->outerImplicit();
850  return *this;
851 }
852 
853 } // namespace clangd
854 } // namespace clang
dlog
#define dlog(...)
Definition: Logger.h:72
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:38
Base
std::unique_ptr< GlobalCompilationDatabase > Base
Definition: GlobalCompilationDatabaseTests.cpp:85
clang::clangd::SelectionTree::Node::kind
std::string kind() const
Definition: Selection.cpp:746
Selection.h
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
clang::clangd::SelectionTree::Node::Children
llvm::SmallVector< const Node *, 8 > Children
Definition: Selection.h:126
clang::clangd::SelectionTree::Node::ignoreImplicit
const Node & ignoreImplicit() const
Definition: Selection.cpp:840
clang::clangd::SelectionTree::createEach
static bool createEach(ASTContext &AST, const syntax::TokenBuffer &Tokens, unsigned Begin, unsigned End, llvm::function_ref< bool(SelectionTree)> Func)
Definition: Selection.cpp:775
clang::clangd::SelectionTree::Node::Selected
Selection Selected
Definition: Selection.h:130
clang::clangd::pointBounds
static llvm::SmallVector< std::pair< unsigned, unsigned >, 2 > pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens)
Definition: Selection.cpp:758
clang::clangd::X
static URISchemeRegistry::Add< TestScheme > X(TestScheme::Scheme, "Test schema")
Trace.h
Selected
SelectionTree::Selection Selected
Definition: Selection.cpp:368
clang::clangd::SelectionTree::Node::outerImplicit
const Node & outerImplicit() const
Definition: Selection.cpp:847
clang::clangd::SelectionTree::Complete
Definition: Selection.h:118
test
Definition: test.py:1
Offset
unsigned Offset
Definition: Selection.cpp:367
clang::clangd::abbreviatedString
llvm::SmallString< 256 > abbreviatedString(DynTypedNode N, const PrintingPolicy &PP)
Definition: Selection.cpp:715
Decl
const FunctionDecl * Decl
Definition: AvoidBindCheck.cpp:100
clang::clangd::SelectionTree::createRight
static SelectionTree createRight(ASTContext &AST, const syntax::TokenBuffer &Tokens, unsigned Begin, unsigned End)
Definition: Selection.cpp:787
clang::clangd::TypeHierarchyDirection::Children
Logger.h
Bounds
PreambleBounds Bounds
Definition: Preamble.cpp:222
clang::clangd::trace::Metric::Distribution
A distribution of values with a meaningful mean and count.
Definition: Trace.h:52
clang::clangd::SelectionTree
Definition: Selection.h:76
clang::clangd::SymbolOrigin::AST
clang::clangd::SelectionTree::Node
Definition: Selection.h:122
clang::tidy::bugprone::PP
static Preprocessor * PP
Definition: BadSignalToKillThreadCheck.cpp:29
clang::clangd::SelectionTree::Node::Parent
Node * Parent
Definition: Selection.h:124
Parent
const Node * Parent
Definition: ExtractFunction.cpp:148
clang::clangd::SelectionTree::commonAncestor
const Node * commonAncestor() const
Definition: Selection.cpp:817
SourceCode.h
clang::clangd::CompletionItemKind::File
clang::clangd::SelectionTree::Node::ASTNode
ast_type_traits::DynTypedNode ASTNode
Definition: Selection.h:128
clang::clangd::SelectionTree::SelectionTree
SelectionTree(const SelectionTree &)=delete
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:162
clang::clangd::SelectionTree::Unselected
Definition: Selection.h:114
Loc
SourceLocation Loc
'#' location in the include directive
Definition: IncludeOrderCheck.cpp:37
Pos
Position Pos
Definition: SourceCode.cpp:649
clang::clangd::SelectionTree::Partial
Definition: Selection.h:116
Out
CompiledFragmentImpl & Out
Definition: ConfigCompile.cpp:70
clang::clangd::SelectionTree::Node::getDeclContext
const DeclContext & getDeclContext() const
Definition: Selection.cpp:827
clang::clangd::SelectionTree::Selection
Selection
Definition: Selection.h:106