clang-tools  11.0.0
BranchCloneCheck.cpp
Go to the documentation of this file.
1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "BranchCloneCheck.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Analysis/CloneDetection.h"
14 #include "clang/Lex/Lexer.h"
15 #include "llvm/Support/Casting.h"
16 
17 using namespace clang;
18 using namespace clang::ast_matchers;
19 
20 /// Returns true when the statements are Type I clones of each other.
21 static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS,
22  const ASTContext &Context) {
23  llvm::FoldingSetNodeID DataLHS, DataRHS;
24  LHS->Profile(DataLHS, Context, false);
25  RHS->Profile(DataRHS, Context, false);
26  return (DataLHS == DataRHS);
27 }
28 
29 namespace {
30 /// A branch in a switch may consist of several statements; while a branch in
31 /// an if/else if/else chain is one statement (which may be a CompoundStmt).
32 using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
33 } // anonymous namespace
34 
35 /// Determines if the bodies of two branches in a switch statements are Type I
36 /// clones of each other. This function only examines the body of the branch
37 /// and ignores the `case X:` or `default:` at the start of the branch.
38 static bool areSwitchBranchesIdentical(const SwitchBranch LHS,
39  const SwitchBranch RHS,
40  const ASTContext &Context) {
41  if (LHS.size() != RHS.size())
42  return false;
43 
44  for (size_t i = 0, Size = LHS.size(); i < Size; i++) {
45  // NOTE: We strip goto labels and annotations in addition to stripping
46  // the `case X:` or `default:` labels, but it is very unlikely that this
47  // would casue false positives in real-world code.
48  if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(),
49  RHS[i]->stripLabelLikeStatements(), Context)) {
50  return false;
51  }
52  }
53 
54  return true;
55 }
56 
57 namespace clang {
58 namespace tidy {
59 namespace bugprone {
60 
61 void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
62  Finder->addMatcher(
63  ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
64  stmt().bind("if"),
65  hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
66  hasElse(stmt().bind("else"))),
67  this);
68  Finder->addMatcher(switchStmt().bind("switch"), this);
69  Finder->addMatcher(conditionalOperator().bind("condOp"), this);
70 }
71 
72 void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
73  const ASTContext &Context = *Result.Context;
74 
75  if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
76  const Stmt *Then = IS->getThen();
77  assert(Then && "An IfStmt must have a `then` branch!");
78 
79  const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
80  assert(Else && "We only look for `if` statements with an `else` branch!");
81 
82  if (!isa<IfStmt>(Else)) {
83  // Just a simple if with no `else if` branch.
84  if (areStatementsIdentical(Then->IgnoreContainers(),
85  Else->IgnoreContainers(), Context)) {
86  diag(IS->getBeginLoc(), "if with identical then and else branches");
87  diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
88  }
89  return;
90  }
91 
92  // This is the complicated case when we start an if/else if/else chain.
93  // To find all the duplicates, we collect all the branches into a vector.
94  llvm::SmallVector<const Stmt *, 4> Branches;
95  const IfStmt *Cur = IS;
96  while (true) {
97  // Store the `then` branch.
98  Branches.push_back(Cur->getThen());
99 
100  Else = Cur->getElse();
101  // The chain ends if there is no `else` branch.
102  if (!Else)
103  break;
104 
105  // Check if there is another `else if`...
106  Cur = dyn_cast<IfStmt>(Else);
107  if (!Cur) {
108  // ...this is just a plain `else` branch at the end of the chain.
109  Branches.push_back(Else);
110  break;
111  }
112  }
113 
114  size_t N = Branches.size();
115  llvm::BitVector KnownAsClone(N);
116 
117  for (size_t i = 0; i + 1 < N; i++) {
118  // We have already seen Branches[i] as a clone of an earlier branch.
119  if (KnownAsClone[i])
120  continue;
121 
122  int NumCopies = 1;
123 
124  for (size_t j = i + 1; j < N; j++) {
125  if (KnownAsClone[j] ||
126  !areStatementsIdentical(Branches[i]->IgnoreContainers(),
127  Branches[j]->IgnoreContainers(), Context))
128  continue;
129 
130  NumCopies++;
131  KnownAsClone[j] = true;
132 
133  if (NumCopies == 2) {
134  // We report the first occurrence only when we find the second one.
135  diag(Branches[i]->getBeginLoc(),
136  "repeated branch in conditional chain");
137  SourceLocation End =
138  Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0,
139  *Result.SourceManager, getLangOpts());
140  if (End.isValid()) {
141  diag(End, "end of the original", DiagnosticIDs::Note);
142  }
143  }
144 
145  diag(Branches[j]->getBeginLoc(), "clone %0 starts here",
146  DiagnosticIDs::Note)
147  << (NumCopies - 1);
148  }
149  }
150  return;
151  }
152 
153  if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
154  // We do not try to detect chains of ?: operators.
155  if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context))
156  diag(CO->getQuestionLoc(),
157  "conditional operator with identical true and false expressions");
158 
159  return;
160  }
161 
162  if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
163  const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
164 
165  // Code like
166  // switch (x) case 0: case 1: foobar();
167  // is legal and calls foobar() if and only if x is either 0 or 1;
168  // but we do not try to distinguish branches in such code.
169  if (!Body)
170  return;
171 
172  // We will first collect the branches of the switch statements. For the
173  // sake of simplicity we say that branches are delimited by the SwitchCase
174  // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
175  // `default:` labels embedded inside other statements and we do not follow
176  // the effects of `break` and other manipulation of the control-flow.
177  llvm::SmallVector<SwitchBranch, 4> Branches;
178  for (const Stmt *S : Body->body()) {
179  // If this is a `case` or `default`, we start a new, empty branch.
180  if (isa<SwitchCase>(S))
181  Branches.emplace_back();
182 
183  // There may be code before the first branch (which can be dead code
184  // and can be code reached either through goto or through case labels
185  // that are embedded inside e.g. inner compound statements); we do not
186  // store those statements in branches.
187  if (!Branches.empty())
188  Branches.back().push_back(S);
189  }
190 
191  auto End = Branches.end();
192  auto BeginCurrent = Branches.begin();
193  while (BeginCurrent < End) {
194  auto EndCurrent = BeginCurrent + 1;
195  while (EndCurrent < End &&
196  areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
197  ++EndCurrent;
198  }
199  // At this point the iterator range {BeginCurrent, EndCurrent} contains a
200  // complete family of consecutive identical branches.
201  if (EndCurrent > BeginCurrent + 1) {
202  diag(BeginCurrent->front()->getBeginLoc(),
203  "switch has %0 consecutive identical branches")
204  << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
205 
206  SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
207  // If the case statement is generated from a macro, it's SourceLocation
208  // may be invalid, resulting in an assertion failure down the line.
209  // While not optimal, try the begin location in this case, it's still
210  // better then nothing.
211  if (EndLoc.isInvalid())
212  EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
213 
214  if (EndLoc.isMacroID())
215  EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
216  EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
217  getLangOpts());
218 
219  if (EndLoc.isValid()) {
220  diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
221  }
222  }
223  BeginCurrent = EndCurrent;
224  }
225  return;
226  }
227 
228  llvm_unreachable("No if statement and no switch statement.");
229 }
230 
231 } // namespace bugprone
232 } // namespace tidy
233 } // namespace clang
Branches
unsigned Branches
Definition: FunctionSizeCheck.cpp:115
clang::ast_matchers
Definition: AbseilMatcher.h:14
areStatementsIdentical
static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS, const ASTContext &Context)
Returns true when the statements are Type I clones of each other.
Definition: BranchCloneCheck.cpp:21
areSwitchBranchesIdentical
static bool areSwitchBranchesIdentical(const SwitchBranch LHS, const SwitchBranch RHS, const ASTContext &Context)
Determines if the bodies of two branches in a switch statements are Type I clones of each other.
Definition: BranchCloneCheck.cpp:38
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
BranchCloneCheck.h