clang-tools  10.0.0git
Selection.cpp
Go to the documentation of this file.
1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Selection.h"
10 #include "Logger.h"
11 #include "SourceCode.h"
12 #include "clang/AST/ASTTypeTraits.h"
13 #include "clang/AST/DeclCXX.h"
14 #include "clang/AST/Expr.h"
15 #include "clang/AST/ExprCXX.h"
16 #include "clang/AST/PrettyPrinter.h"
17 #include "clang/AST/RecursiveASTVisitor.h"
18 #include "clang/AST/TypeLoc.h"
19 #include "clang/Basic/OperatorKinds.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Lex/Lexer.h"
24 #include "clang/Tooling/Syntax/Tokens.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/Casting.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <string>
30 
31 namespace clang {
32 namespace clangd {
33 namespace {
34 using Node = SelectionTree::Node;
35 using ast_type_traits::DynTypedNode;
36 
37 // An IntervalSet maintains a set of disjoint subranges of an array.
38 //
39 // Initially, it contains the entire array.
40 // [-----------------------------------------------------------]
41 //
42 // When a range is erased(), it will typically split the array in two.
43 // Claim: [--------------------]
44 // after: [----------------] [-------------------]
45 //
46 // erase() returns the segments actually erased. Given the state above:
47 // Claim: [---------------------------------------]
48 // Out: [---------] [------]
49 // After: [-----] [-----------]
50 //
51 // It is used to track (expanded) tokens not yet associated with an AST node.
52 // On traversing an AST node, its token range is erased from the unclaimed set.
53 // The tokens actually removed are associated with that node, and hit-tested
54 // against the selection to determine whether the node is selected.
55 template <typename T>
56 class IntervalSet {
57 public:
58  IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
59 
60  // Removes the elements of Claim from the set, modifying or removing ranges
61  // that overlap it.
62  // Returns the continuous subranges of Claim that were actually removed.
63  llvm::SmallVector<llvm::ArrayRef<T>, 4> erase(llvm::ArrayRef<T> Claim) {
64  llvm::SmallVector<llvm::ArrayRef<T>, 4> Out;
65  if (Claim.empty())
66  return Out;
67 
68  // General case:
69  // Claim: [-----------------]
70  // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
71  // Overlap: ^first ^second
72  // Ranges C and D are fully included. Ranges B and E must be trimmed.
73  auto Overlap = std::make_pair(
74  UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
75  UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
76  // Rewind to cover B.
77  if (Overlap.first != UnclaimedRanges.begin()) {
78  --Overlap.first;
79  // ...unless B isn't selected at all.
80  if (Overlap.first->end() <= Claim.begin())
81  ++Overlap.first;
82  }
83  if (Overlap.first == Overlap.second)
84  return Out;
85 
86  // First, copy all overlapping ranges into the output.
87  auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
88  // If any of the overlapping ranges were sliced by the claim, split them:
89  // - restrict the returned range to the claimed part
90  // - save the unclaimed part so it can be reinserted
91  llvm::ArrayRef<T> RemainingHead, RemainingTail;
92  if (Claim.begin() > OutFirst->begin()) {
93  RemainingHead = {OutFirst->begin(), Claim.begin()};
94  *OutFirst = {Claim.begin(), OutFirst->end()};
95  }
96  if (Claim.end() < Out.back().end()) {
97  RemainingTail = {Claim.end(), Out.back().end()};
98  Out.back() = {Out.back().begin(), Claim.end()};
99  }
100 
101  // Erase all the overlapping ranges (invalidating all iterators).
102  UnclaimedRanges.erase(Overlap.first, Overlap.second);
103  // Reinsert ranges that were merely trimmed.
104  if (!RemainingHead.empty())
105  UnclaimedRanges.insert(RemainingHead);
106  if (!RemainingTail.empty())
107  UnclaimedRanges.insert(RemainingTail);
108 
109  return Out;
110  }
111 
112 private:
113  using TokenRange = llvm::ArrayRef<T>;
114  struct RangeLess {
115  bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
116  return L.begin() < R.begin();
117  }
118  };
119 
120  // Disjoint sorted unclaimed ranges of expanded tokens.
121  std::set<llvm::ArrayRef<T>, RangeLess>
122  UnclaimedRanges;
123 };
124 
125 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
126 // This resolves to Unselected if no tokens are ever seen.
127 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
128 // This value is never exposed publicly.
129 constexpr SelectionTree::Selection NoTokens =
130  static_cast<SelectionTree::Selection>(
131  static_cast<unsigned char>(SelectionTree::Complete + 1));
132 
133 // Nodes start with NoTokens, and then use this function to aggregate the
134 // selectedness as more tokens are found.
135 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
136  if (New == NoTokens)
137  return;
138  if (Result == NoTokens)
139  Result = New;
140  else if (Result != New)
141  // Can only be completely selected (or unselected) if all tokens are.
142  Result = SelectionTree::Partial;
143 }
144 
145 
146 // SelectionTester can determine whether a range of tokens from the PP-expanded
147 // stream (corresponding to an AST node) is considered selected.
148 //
149 // When the tokens result from macro expansions, the appropriate tokens in the
150 // main file are examined (macro invocation or args). Similarly for #includes.
151 //
152 // It tests each token in the range (not just the endpoints) as contiguous
153 // expanded tokens may not have contiguous spellings (with macros).
154 //
155 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
156 // are ignored when determining selectedness.
157 class SelectionTester {
158 public:
159  // The selection is offsets [SelBegin, SelEnd) in SelFile.
160  SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
161  unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
162  : SelFile(SelFile), SM(SM) {
163  // Find all tokens (partially) selected in the file.
164  auto AllSpelledTokens = Buf.spelledTokens(SelFile);
165  const syntax::Token *SelFirst =
166  llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
167  return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
168  });
169  const syntax::Token *SelLimit = std::partition_point(
170  SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
171  return SM.getFileOffset(Tok.location()) < SelEnd;
172  });
173  // Precompute selectedness and offset for selected spelled tokens.
174  for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) {
175  // As well as comments, don't count semicolons as real tokens.
176  // They're not properly claimed as expr-statement is missing from the AST.
177  if (T->kind() == tok::comment || T->kind() == tok::semi)
178  continue;
179  SpelledTokens.emplace_back();
180  Tok &S = SpelledTokens.back();
181  S.Offset = SM.getFileOffset(T->location());
182  if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd)
183  S.Selected = SelectionTree::Complete;
184  else
185  S.Selected = SelectionTree::Partial;
186  }
187  }
188 
189  // Test whether a consecutive range of tokens is selected.
190  // The tokens are taken from the expanded token stream.
192  test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
193  if (SpelledTokens.empty())
194  return NoTokens;
195  SelectionTree::Selection Result = NoTokens;
196  while (!ExpandedTokens.empty()) {
197  // Take consecutive tokens from the same context together for efficiency.
198  FileID FID = SM.getFileID(ExpandedTokens.front().location());
199  auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
200  return SM.getFileID(T.location()) == FID;
201  });
202  assert(!Batch.empty());
203  ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
204 
205  update(Result, testChunk(FID, Batch));
206  }
207  return Result;
208  }
209 
210  // Cheap check whether any of the tokens in R might be selected.
211  // If it returns false, test() will return NoTokens or Unselected.
212  // If it returns true, test() may return any value.
213  bool mayHit(SourceRange R) const {
214  if (SpelledTokens.empty())
215  return false;
216  auto B = SM.getDecomposedLoc(R.getBegin());
217  auto E = SM.getDecomposedLoc(R.getEnd());
218  if (B.first == SelFile && E.first == SelFile)
219  if (E.second < SpelledTokens.front().Offset ||
220  B.second > SpelledTokens.back().Offset)
221  return false;
222  return true;
223  }
224 
225 private:
226  // Hit-test a consecutive range of tokens from a single file ID.
228  testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
229  assert(!Batch.empty());
230  SourceLocation StartLoc = Batch.front().location();
231  // There are several possible categories of FileID depending on how the
232  // preprocessor was used to generate these tokens:
233  // main file, #included file, macro args, macro bodies.
234  // We need to identify the main-file tokens that represent Batch, and
235  // determine whether we want to exclusively claim them. Regular tokens
236  // represent one AST construct, but a macro invocation can represent many.
237 
238  // Handle tokens written directly in the main file.
239  if (FID == SelFile) {
240  return testTokenRange(SM.getFileOffset(Batch.front().location()),
241  SM.getFileOffset(Batch.back().location()));
242  }
243 
244  // Handle tokens in another file #included into the main file.
245  // Check if the #include is selected, but don't claim it exclusively.
246  if (StartLoc.isFileID()) {
247  for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
248  Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
249  if (SM.getFileID(Loc) == SelFile)
250  // FIXME: use whole #include directive, not just the filename string.
251  return testToken(SM.getFileOffset(Loc));
252  }
253  return NoTokens;
254  }
255 
256  assert(StartLoc.isMacroID());
257  // Handle tokens that were passed as a macro argument.
258  SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
259  if (SM.getFileID(ArgStart) == SelFile) {
260  SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location());
261  return testTokenRange(SM.getFileOffset(ArgStart),
262  SM.getFileOffset(ArgEnd));
263  }
264 
265  // Handle tokens produced by non-argument macro expansion.
266  // Check if the macro name is selected, don't claim it exclusively.
267  auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
268  if (Expansion.first == SelFile)
269  // FIXME: also check ( and ) for function-like macros?
270  return testToken(Expansion.second);
271  else
272  return NoTokens;
273  }
274 
275  // Is the closed token range [Begin, End] selected?
276  SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
277  assert(Begin <= End);
278  // Outside the selection entirely?
279  if (End < SpelledTokens.front().Offset ||
280  Begin > SpelledTokens.back().Offset)
282 
283  // Compute range of tokens.
284  auto B = llvm::partition_point(
285  SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
286  auto E = std::partition_point(
287  B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
288 
289  // Aggregate selectedness of tokens in range.
290  bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
291  End > SpelledTokens.back().Offset;
292  SelectionTree::Selection Result =
293  ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
294  for (auto It = B; It != E; ++It)
295  update(Result, It->Selected);
296  return Result;
297  }
298 
299  // Is the token at `Offset` selected?
300  SelectionTree::Selection testToken(unsigned Offset) const {
301  // Outside the selection entirely?
302  if (Offset < SpelledTokens.front().Offset ||
303  Offset > SpelledTokens.back().Offset)
305  // Find the token, if it exists.
306  auto It = llvm::partition_point(
307  SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
308  if (It != SpelledTokens.end() && It->Offset == Offset)
309  return It->Selected;
310  return NoTokens;
311  }
312 
313  struct Tok {
314  unsigned Offset;
316  };
317  std::vector<Tok> SpelledTokens;
318  FileID SelFile;
319  const SourceManager &SM;
320 };
321 
322 // Show the type of a node for debugging.
323 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
324  if (const TypeLoc *TL = N.get<TypeLoc>()) {
325  // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
326  // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
327  if (TL->getTypeLocClass() == TypeLoc::Qualified)
328  OS << "QualifiedTypeLoc";
329  else
330  OS << TL->getType()->getTypeClassName() << "TypeLoc";
331  } else {
332  OS << N.getNodeKind().asStringRef();
333  }
334 }
335 
336 #ifndef NDEBUG
337 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
338  std::string S;
339  llvm::raw_string_ostream OS(S);
340  printNodeKind(OS, N);
341  OS << " ";
342  return std::move(OS.str());
343 }
344 #endif
345 
346 bool isImplicit(const Stmt* S) {
347  // Some Stmts are implicit and shouldn't be traversed, but there's no
348  // "implicit" attribute on Stmt/Expr.
349  // Unwrap implicit casts first if present (other nodes too?).
350  if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
351  S = ICE->getSubExprAsWritten();
352  // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
353  // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
354  if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
355  if (CTI->isImplicit())
356  return true;
357  // Refs to operator() and [] are (almost?) always implicit as part of calls.
358  if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
359  if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
360  switch (FD->getOverloadedOperator()) {
361  case OO_Call:
362  case OO_Subscript:
363  return true;
364  default:
365  break;
366  }
367  }
368  }
369  return false;
370 }
371 
372 // We find the selection by visiting written nodes in the AST, looking for nodes
373 // that intersect with the selected character range.
374 //
375 // While traversing, we maintain a parent stack. As nodes pop off the stack,
376 // we decide whether to keep them or not. To be kept, they must either be
377 // selected or contain some nodes that are.
378 //
379 // For simple cases (not inside macros) we prune subtrees that don't intersect.
380 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
381 public:
382  // Runs the visitor to gather selected nodes and their ancestors.
383  // If there is any selection, the root (TUDecl) is the first node.
384  static std::deque<Node> collect(ASTContext &AST,
385  const syntax::TokenBuffer &Tokens,
386  const PrintingPolicy &PP, unsigned Begin,
387  unsigned End, FileID File) {
388  SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
389  V.TraverseAST(AST);
390  assert(V.Stack.size() == 1 && "Unpaired push/pop?");
391  assert(V.Stack.top() == &V.Nodes.front());
392  return std::move(V.Nodes);
393  }
394 
395  // We traverse all "well-behaved" nodes the same way:
396  // - push the node onto the stack
397  // - traverse its children recursively
398  // - pop it from the stack
399  // - hit testing: is intersection(node, selection) - union(children) empty?
400  // - attach it to the tree if it or any children hit the selection
401  //
402  // Two categories of nodes are not "well-behaved":
403  // - those without source range information, we don't record those
404  // - those that can't be stored in DynTypedNode.
405  // We're missing some interesting things like Attr due to the latter.
406  bool TraverseDecl(Decl *X) {
407  if (X && isa<TranslationUnitDecl>(X))
408  return Base::TraverseDecl(X); // Already pushed by constructor.
409  // Base::TraverseDecl will suppress children, but not this node itself.
410  if (X && X->isImplicit())
411  return true;
412  return traverseNode(X, [&] { return Base::TraverseDecl(X); });
413  }
414  bool TraverseTypeLoc(TypeLoc X) {
415  return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
416  }
417  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
418  return traverseNode(
419  &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
420  }
421  bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
422  return traverseNode(
423  X, [&] { return Base::TraverseConstructorInitializer(X); });
424  }
425  // Stmt is the same, but this form allows the data recursion optimization.
426  bool dataTraverseStmtPre(Stmt *X) {
427  if (!X || isImplicit(X))
428  return false;
429  auto N = DynTypedNode::create(*X);
430  if (canSafelySkipNode(N))
431  return false;
432  push(std::move(N));
433  if (shouldSkipChildren(X)) {
434  pop();
435  return false;
436  }
437  return true;
438  }
439  bool dataTraverseStmtPost(Stmt *X) {
440  pop();
441  return true;
442  }
443  // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
444  // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
445  // This means we'd never see 'int' in 'const int'! Work around that here.
446  // (The reason for the behavior is to avoid traversing the nested Type twice,
447  // but we ignore TraverseType anyway).
448  bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
449  return traverseNode<TypeLoc>(
450  &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
451  }
452  // Uninteresting parts of the AST that don't have locations within them.
453  bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
454  bool TraverseType(QualType) { return true; }
455 
456  // The DeclStmt for the loop variable claims to cover the whole range
457  // inside the parens, this causes the range-init expression to not be hit.
458  // Traverse the loop VarDecl instead, which has the right source range.
459  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
460  return traverseNode(S, [&] {
461  return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
462  TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
463  });
464  }
465 
466 private:
467  using Base = RecursiveASTVisitor<SelectionVisitor>;
468 
469  SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
470  const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
471  FileID SelFile)
472  : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
473 #ifndef NDEBUG
474  PrintPolicy(PP),
475 #endif
476  TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
477  UnclaimedExpandedTokens(Tokens.expandedTokens()) {
478  // Ensure we have a node for the TU decl, regardless of traversal scope.
479  Nodes.emplace_back();
480  Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
481  Nodes.back().Parent = nullptr;
482  Nodes.back().Selected = SelectionTree::Unselected;
483  Stack.push(&Nodes.back());
484  }
485 
486  // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
487  // Node is always a pointer so the generic code can handle any null checks.
488  template <typename T, typename Func>
489  bool traverseNode(T *Node, const Func &Body) {
490  if (Node == nullptr)
491  return true;
492  auto N = DynTypedNode::create(*Node);
493  if (canSafelySkipNode(N))
494  return true;
495  push(DynTypedNode::create(*Node));
496  bool Ret = Body();
497  pop();
498  return Ret;
499  }
500 
501  // HIT TESTING
502  //
503  // We do rough hit testing on the way down the tree to avoid traversing
504  // subtrees that don't touch the selection (canSafelySkipNode), but
505  // fine-grained hit-testing is mostly done on the way back up (in pop()).
506  // This means children get to claim parts of the selection first, and parents
507  // are only selected if they own tokens that no child owned.
508  //
509  // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
510  // parent's, and a node (transitively) owns all tokens in its range.
511  //
512  // Exception 1: child range claims tokens that should be owned by the parent.
513  // e.g. in `void foo(int);`, the FunctionTypeLoc should own
514  // `void (int)` but the parent FunctionDecl should own `foo`.
515  // To handle this case, certain nodes claim small token ranges *before*
516  // their children are traversed. (see earlySourceRange).
517  //
518  // Exception 2: siblings both claim the same node.
519  // e.g. `int x, y;` produces two sibling VarDecls.
520  // ~~~~~ x
521  // ~~~~~~~~ y
522  // Here the first ("leftmost") sibling claims the tokens it wants, and the
523  // other sibling gets what's left. So selecting "int" only includes the left
524  // VarDecl in the selection tree.
525 
526  // An optimization for a common case: nodes outside macro expansions that
527  // don't intersect the selection may be recursively skipped.
528  bool canSafelySkipNode(const DynTypedNode &N) {
529  SourceRange S = N.getSourceRange();
530  if (auto *TL = N.get<TypeLoc>()) {
531  // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
532  // failing
533  // to descend into the child expression.
534  // decltype(2+2);
535  // ~~~~~~~~~~~~~ <-- correct range
536  // ~~~~~~~~ <-- range reported by getSourceRange()
537  // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren)
538  // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
539  // rid of this patch.
540  if (auto DT = TL->getAs<DecltypeTypeLoc>())
541  S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
542  }
543  if (!SelChecker.mayHit(S)) {
544  dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
545  dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
546  return true;
547  }
548  return false;
549  }
550 
551  // There are certain nodes we want to treat as leaves in the SelectionTree,
552  // although they do have children.
553  bool shouldSkipChildren(const Stmt *X) const {
554  // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
555  // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
556  // So we treat UserDefinedLiteral as a leaf node, owning the token.
557  return llvm::isa<UserDefinedLiteral>(X);
558  }
559 
560  // Pushes a node onto the ancestor stack. Pairs with pop().
561  // Performs early hit detection for some nodes (on the earlySourceRange).
562  void push(DynTypedNode Node) {
563  SourceRange Early = earlySourceRange(Node);
564  dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
565  Nodes.emplace_back();
566  Nodes.back().ASTNode = std::move(Node);
567  Nodes.back().Parent = Stack.top();
568  Nodes.back().Selected = NoTokens;
569  Stack.push(&Nodes.back());
570  claimRange(Early, Nodes.back().Selected);
571  }
572 
573  // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
574  // Performs primary hit detection.
575  void pop() {
576  Node &N = *Stack.top();
577  dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
578  claimRange(N.ASTNode.getSourceRange(), N.Selected);
579  if (N.Selected == NoTokens)
580  N.Selected = SelectionTree::Unselected;
581  if (N.Selected || !N.Children.empty()) {
582  // Attach to the tree.
583  N.Parent->Children.push_back(&N);
584  } else {
585  // Neither N any children are selected, it doesn't belong in the tree.
586  assert(&N == &Nodes.back());
587  Nodes.pop_back();
588  }
589  Stack.pop();
590  }
591 
592  // Returns the range of tokens that this node will claim directly, and
593  // is not available to the node's children.
594  // Usually empty, but sometimes children cover tokens but shouldn't own them.
595  SourceRange earlySourceRange(const DynTypedNode &N) {
596  if (const Decl *D = N.get<Decl>()) {
597  // void [[foo]]();
598  if (auto *FD = llvm::dyn_cast<FunctionDecl>(D))
599  return FD->getNameInfo().getSourceRange();
600  // int (*[[s]])();
601  else if (auto *VD = llvm::dyn_cast<VarDecl>(D))
602  return VD->getLocation();
603  } else if (const auto* CCI = N.get<CXXCtorInitializer>()) {
604  // : [[b_]](42)
605  return CCI->getMemberLocation();
606  }
607  return SourceRange();
608  }
609 
610  // Perform hit-testing of a complete Node against the selection.
611  // This runs for every node in the AST, and must be fast in common cases.
612  // This is usually called from pop(), so we can take children into account.
613  // The existing state of Result is relevant (early/late claims can interact).
614  void claimRange(SourceRange S, SelectionTree::Selection &Result) {
615  for (const auto &ClaimedRange :
616  UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
617  update(Result, SelChecker.test(ClaimedRange));
618 
619  if (Result && Result != NoTokens)
620  dlog("{1}hit selection: {0}", S.printToString(SM), indent());
621  }
622 
623  std::string indent(int Offset = 0) {
624  // Cast for signed arithmetic.
625  int Amount = int(Stack.size()) + Offset;
626  assert(Amount >= 0);
627  return std::string(Amount, ' ');
628  }
629 
630  SourceManager &SM;
631  const LangOptions &LangOpts;
632 #ifndef NDEBUG
633  const PrintingPolicy &PrintPolicy;
634 #endif
635  const syntax::TokenBuffer &TokenBuf;
636  std::stack<Node *> Stack;
637  SelectionTester SelChecker;
638  IntervalSet<syntax::Token> UnclaimedExpandedTokens;
639  std::deque<Node> Nodes; // Stable pointers as we add more nodes.
640 };
641 
642 } // namespace
643 
644 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
645  int Indent) const {
646  if (N.Selected)
647  OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
648  : '.');
649  else
650  OS.indent(Indent);
651  printNodeKind(OS, N.ASTNode);
652  OS << ' ';
653  N.ASTNode.print(OS, PrintPolicy);
654  OS << "\n";
655  for (const Node *Child : N.Children)
656  print(OS, *Child, Indent + 2);
657 }
658 
659 std::string SelectionTree::Node::kind() const {
660  std::string S;
661  llvm::raw_string_ostream OS(S);
662  printNodeKind(OS, ASTNode);
663  return std::move(OS.str());
664 }
665 
666 // Decide which selection emulates a "point" query in between characters.
667 static std::pair<unsigned, unsigned> pointBounds(unsigned Offset, FileID FID,
668  ASTContext &AST) {
669  StringRef Buf = AST.getSourceManager().getBufferData(FID);
670  // Edge-cases where the choice is forced.
671  if (Buf.size() == 0)
672  return {0, 0};
673  if (Offset == 0)
674  return {0, 1};
675  if (Offset == Buf.size())
676  return {Offset - 1, Offset};
677  // We could choose either this byte or the previous. Usually we prefer the
678  // character on the right of the cursor (or under a block cursor).
679  // But if that's whitespace/semicolon, we likely want the token on the left.
680  auto IsIgnoredChar = [](char C) { return isWhitespace(C) || C == ';'; };
681  if (IsIgnoredChar(Buf[Offset]) && !IsIgnoredChar(Buf[Offset - 1]))
682  return {Offset - 1, Offset};
683  return {Offset, Offset + 1};
684 }
685 
686 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
687  unsigned Begin, unsigned End)
688  : PrintPolicy(AST.getLangOpts()) {
689  // No fundamental reason the selection needs to be in the main file,
690  // but that's all clangd has needed so far.
691  const SourceManager &SM = AST.getSourceManager();
692  FileID FID = SM.getMainFileID();
693  if (Begin == End)
694  std::tie(Begin, End) = pointBounds(Begin, FID, AST);
695  PrintPolicy.TerseOutput = true;
696  PrintPolicy.IncludeNewlines = false;
697 
698  dlog("Computing selection for {0}",
699  SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
700  .printToString(SM));
701  Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
702  Root = Nodes.empty() ? nullptr : &Nodes.front();
703  dlog("Built selection tree\n{0}", *this);
704 }
705 
706 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
707  unsigned Offset)
708  : SelectionTree(AST, Tokens, Offset, Offset) {}
709 
711  const Node *Ancestor = Root;
712  while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
713  Ancestor = Ancestor->Children.front();
714  // Returning nullptr here is a bit unprincipled, but it makes the API safer:
715  // the TranslationUnitDecl contains all of the preamble, so traversing it is a
716  // performance cliff. Callers can check for null and use root() if they want.
717  return Ancestor != Root ? Ancestor : nullptr;
718 }
719 
720 const DeclContext& SelectionTree::Node::getDeclContext() const {
721  for (const Node* CurrentNode = this; CurrentNode != nullptr;
722  CurrentNode = CurrentNode->Parent) {
723  if (const Decl* Current = CurrentNode->ASTNode.get<Decl>()) {
724  if (CurrentNode != this)
725  if (auto *DC = dyn_cast<DeclContext>(Current))
726  return *DC;
727  return *Current->getDeclContext();
728  }
729  }
730  llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
731 }
732 
734  if (Children.size() == 1 &&
735  Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
736  return Children.front()->ignoreImplicit();
737  return *this;
738 }
739 
741  if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange())
742  return Parent->outerImplicit();
743  return *this;
744 }
745 
746 } // namespace clangd
747 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
const FunctionDecl * Decl
const Node * Parent
llvm::SmallVector< const Node *, 8 > Children
Definition: Selection.h:99
const Node & ignoreImplicit() const
Definition: Selection.cpp:733
Definition: test.py:1
SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens, unsigned Offset)
Definition: Selection.cpp:706
const Node & outerImplicit() const
Definition: Selection.cpp:740
static std::pair< unsigned, unsigned > pointBounds(unsigned Offset, FileID FID, ASTContext &AST)
Definition: Selection.cpp:667
unsigned Offset
Definition: Selection.cpp:314
#define dlog(...)
Definition: Logger.h:72
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static URISchemeRegistry::Add< TestScheme > X(TestScheme::Scheme, "Test schema")
CharSourceRange Range
SourceRange for the file name.
ast_type_traits::DynTypedNode ASTNode
Definition: Selection.h:101
const Expr * E
const Node * commonAncestor() const
Definition: Selection.cpp:710
std::unique_ptr< GlobalCompilationDatabase > Base
SelectionTree::Selection Selected
Definition: Selection.cpp:315
const DeclContext & getDeclContext() const
Definition: Selection.cpp:720