clang-tools  10.0.0git
Quality.cpp
Go to the documentation of this file.
1 //===--- Quality.cpp ---------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Quality.h"
10 #include "AST.h"
11 #include "FileDistance.h"
12 #include "SourceCode.h"
13 #include "URI.h"
14 #include "index/Symbol.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/DeclTemplate.h"
19 #include "clang/AST/DeclVisitor.h"
20 #include "clang/Basic/CharInfo.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Sema/CodeCompleteConsumer.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/FormatVariadic.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <algorithm>
33 #include <cmath>
34 
35 namespace clang {
36 namespace clangd {
37 static bool isReserved(llvm::StringRef Name) {
38  // FIXME: Should we exclude _Bool and others recognized by the standard?
39  return Name.size() >= 2 && Name[0] == '_' &&
40  (isUppercase(Name[1]) || Name[1] == '_');
41 }
42 
43 static bool hasDeclInMainFile(const Decl &D) {
44  auto &SourceMgr = D.getASTContext().getSourceManager();
45  for (auto *Redecl : D.redecls()) {
46  if (isInsideMainFile(Redecl->getLocation(), SourceMgr))
47  return true;
48  }
49  return false;
50 }
51 
52 static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) {
53  const auto &Context = R.Declaration->getASTContext();
54  const auto &SourceMgr = Context.getSourceManager();
55  if (R.ShadowDecl) {
56  if (isInsideMainFile(R.ShadowDecl->getLocation(), SourceMgr))
57  return true;
58  }
59  return false;
60 }
61 
62 static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
63  if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) {
64  if (FD->isOverloadedOperator())
66  }
67  class Switch
68  : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
69  public:
70 #define MAP(DeclType, Category) \
71  SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
72  return SymbolQualitySignals::Category; \
73  }
74  MAP(NamespaceDecl, Namespace);
75  MAP(NamespaceAliasDecl, Namespace);
76  MAP(TypeDecl, Type);
77  MAP(TypeAliasTemplateDecl, Type);
78  MAP(ClassTemplateDecl, Type);
79  MAP(CXXConstructorDecl, Constructor);
80  MAP(CXXDestructorDecl, Destructor);
81  MAP(ValueDecl, Variable);
82  MAP(VarTemplateDecl, Variable);
83  MAP(FunctionDecl, Function);
84  MAP(FunctionTemplateDecl, Function);
85  MAP(Decl, Unknown);
86 #undef MAP
87  };
88  return Switch().Visit(&ND);
89 }
90 
92 categorize(const CodeCompletionResult &R) {
93  if (R.Declaration)
94  return categorize(*R.Declaration);
95  if (R.Kind == CodeCompletionResult::RK_Macro)
97  // Everything else is a keyword or a pattern. Patterns are mostly keywords
98  // too, except a few which we recognize by cursor kind.
99  switch (R.CursorKind) {
100  case CXCursor_CXXMethod:
102  case CXCursor_ModuleImportDecl:
104  case CXCursor_MacroDefinition:
106  case CXCursor_TypeRef:
108  case CXCursor_MemberRef:
110  case CXCursor_Constructor:
112  default:
114  }
115 }
116 
119  switch (D.Kind) {
120  case index::SymbolKind::Namespace:
121  case index::SymbolKind::NamespaceAlias:
123  case index::SymbolKind::Macro:
125  case index::SymbolKind::Enum:
126  case index::SymbolKind::Struct:
127  case index::SymbolKind::Class:
128  case index::SymbolKind::Protocol:
129  case index::SymbolKind::Extension:
130  case index::SymbolKind::Union:
131  case index::SymbolKind::TypeAlias:
133  case index::SymbolKind::Function:
134  case index::SymbolKind::ClassMethod:
135  case index::SymbolKind::InstanceMethod:
136  case index::SymbolKind::StaticMethod:
137  case index::SymbolKind::InstanceProperty:
138  case index::SymbolKind::ClassProperty:
139  case index::SymbolKind::StaticProperty:
140  case index::SymbolKind::ConversionFunction:
142  case index::SymbolKind::Destructor:
144  case index::SymbolKind::Constructor:
146  case index::SymbolKind::Variable:
147  case index::SymbolKind::Field:
148  case index::SymbolKind::EnumConstant:
149  case index::SymbolKind::Parameter:
151  case index::SymbolKind::Using:
152  case index::SymbolKind::Module:
155  }
156  llvm_unreachable("Unknown index::SymbolKind");
157 }
158 
159 static bool isInstanceMember(const NamedDecl *ND) {
160  if (!ND)
161  return false;
162  if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND))
163  ND = TP->TemplateDecl::getTemplatedDecl();
164  if (const auto *CM = dyn_cast<CXXMethodDecl>(ND))
165  return !CM->isStatic();
166  return isa<FieldDecl>(ND); // Note that static fields are VarDecl.
167 }
168 
169 static bool isInstanceMember(const index::SymbolInfo &D) {
170  switch (D.Kind) {
171  case index::SymbolKind::InstanceMethod:
172  case index::SymbolKind::InstanceProperty:
173  case index::SymbolKind::Field:
174  return true;
175  default:
176  return false;
177  }
178 }
179 
180 void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
181  Deprecated |= (SemaCCResult.Availability == CXAvailability_Deprecated);
182  Category = categorize(SemaCCResult);
183 
184  if (SemaCCResult.Declaration) {
185  ImplementationDetail |= isImplementationDetail(SemaCCResult.Declaration);
186  if (auto *ID = SemaCCResult.Declaration->getIdentifier())
187  ReservedName = ReservedName || isReserved(ID->getName());
188  } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
189  ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName());
190 }
191 
193  Deprecated |= (IndexResult.Flags & Symbol::Deprecated);
194  ImplementationDetail |= (IndexResult.Flags & Symbol::ImplementationDetail);
195  References = std::max(IndexResult.References, References);
196  Category = categorize(IndexResult.SymInfo);
197  ReservedName = ReservedName || isReserved(IndexResult.Name);
198 }
199 
201  float Score = 1;
202 
203  // This avoids a sharp gradient for tail symbols, and also neatly avoids the
204  // question of whether 0 references means a bad symbol or missing data.
205  if (References >= 10) {
206  // Use a sigmoid style boosting function, which flats out nicely for large
207  // numbers (e.g. 2.58 for 1M refererences).
208  // The following boosting function is equivalent to:
209  // m = 0.06
210  // f = 12.0
211  // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59
212  // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82),
213  // (10K, 2.21), (100K, 2.58), (1M, 2.94)
214  float S = std::pow(References, -0.06);
215  Score *= 6.0 * (1 - S) / (1 + S) + 0.59;
216  }
217 
218  if (Deprecated)
219  Score *= 0.1f;
220  if (ReservedName)
221  Score *= 0.1f;
222  if (ImplementationDetail)
223  Score *= 0.2f;
224 
225  switch (Category) {
226  case Keyword: // Often relevant, but misses most signals.
227  Score *= 4; // FIXME: important keywords should have specific boosts.
228  break;
229  case Type:
230  case Function:
231  case Variable:
232  Score *= 1.1f;
233  break;
234  case Namespace:
235  Score *= 0.8f;
236  break;
237  case Macro:
238  case Destructor:
239  case Operator:
240  Score *= 0.5f;
241  break;
242  case Constructor: // No boost constructors so they are after class types.
243  case Unknown:
244  break;
245  }
246 
247  return Score;
248 }
249 
250 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
251  const SymbolQualitySignals &S) {
252  OS << llvm::formatv("=== Symbol quality: {0}\n", S.evaluate());
253  OS << llvm::formatv("\tReferences: {0}\n", S.References);
254  OS << llvm::formatv("\tDeprecated: {0}\n", S.Deprecated);
255  OS << llvm::formatv("\tReserved name: {0}\n", S.ReservedName);
256  OS << llvm::formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
257  return OS;
258 }
259 
261 computeScope(const NamedDecl *D) {
262  // Injected "Foo" within the class "Foo" has file scope, not class scope.
263  const DeclContext *DC = D->getDeclContext();
264  if (auto *R = dyn_cast_or_null<RecordDecl>(D))
265  if (R->isInjectedClassName())
266  DC = DC->getParent();
267  // Class constructor should have the same scope as the class.
268  if (isa<CXXConstructorDecl>(D))
269  DC = DC->getParent();
270  bool InClass = false;
271  for (; !DC->isFileContext(); DC = DC->getParent()) {
272  if (DC->isFunctionOrMethod())
274  InClass = InClass || DC->isRecord();
275  }
276  if (InClass)
278  // This threshold could be tweaked, e.g. to treat module-visible as global.
279  if (D->getLinkageInternal() < ExternalLinkage)
282 }
283 
285  SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
286  SymbolScope = IndexResult.Scope;
287  IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
288  if (!(IndexResult.Flags & Symbol::VisibleOutsideFile)) {
289  Scope = AccessibleScope::FileScope;
290  }
291 }
292 
293 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
294  if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
295  SemaCCResult.Availability == CXAvailability_NotAccessible)
296  Forbidden = true;
297 
298  if (SemaCCResult.Declaration) {
299  SemaSaysInScope = true;
300  // We boost things that have decls in the main file. We give a fixed score
301  // for all other declarations in sema as they are already included in the
302  // translation unit.
303  float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) ||
304  hasUsingDeclInMainFile(SemaCCResult))
305  ? 1.0
306  : 0.6;
307  SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore);
308  IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
309  InBaseClass |= SemaCCResult.InBaseClass;
310  }
311 
312  // Declarations are scoped, others (like macros) are assumed global.
313  if (SemaCCResult.Declaration)
314  Scope = std::min(Scope, computeScope(SemaCCResult.Declaration));
315 
316  NeedsFixIts = !SemaCCResult.FixIts.empty();
317 }
318 
319 static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI,
320  URIDistance *D) {
321  if (!D || SymbolURI.empty())
322  return {0.f, 0u};
323  unsigned Distance = D->distance(SymbolURI);
324  // Assume approximately default options are used for sensible scoring.
325  return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
326 }
327 
328 static float scopeBoost(ScopeDistance &Distance,
329  llvm::Optional<llvm::StringRef> SymbolScope) {
330  if (!SymbolScope)
331  return 1;
332  auto D = Distance.distance(*SymbolScope);
333  if (D == FileDistance::Unreachable)
334  return 0.6f;
335  return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0));
336 }
337 
338 static llvm::Optional<llvm::StringRef>
339 wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) {
340  if (ContextWords)
341  for (const auto& Word : ContextWords->keys())
342  if (Name.contains_lower(Word))
343  return Word;
344  return llvm::None;
345 }
346 
348  float Score = 1;
349 
350  if (Forbidden)
351  return 0;
352 
353  Score *= NameMatch;
354 
355  // File proximity scores are [0,1] and we translate them into a multiplier in
356  // the range from 1 to 3.
357  Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first,
358  SemaFileProximityScore);
359 
360  if (ScopeProximityMatch)
361  // Use a constant scope boost for sema results, as scopes of sema results
362  // can be tricky (e.g. class/function scope). Set to the max boost as we
363  // don't load top-level symbols from the preamble and sema results are
364  // always in the accessible scope.
365  Score *=
366  SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope);
367 
368  if (wordMatching(Name, ContextWords))
369  Score *= 1.5;
370 
371  // Symbols like local variables may only be referenced within their scope.
372  // Conversely if we're in that scope, it's likely we'll reference them.
373  if (Query == CodeComplete) {
374  // The narrower the scope where a symbol is visible, the more likely it is
375  // to be relevant when it is available.
376  switch (Scope) {
377  case GlobalScope:
378  break;
379  case FileScope:
380  Score *= 1.5f;
381  break;
382  case ClassScope:
383  Score *= 2;
384  break;
385  case FunctionScope:
386  Score *= 4;
387  break;
388  }
389  } else {
390  // For non-completion queries, the wider the scope where a symbol is
391  // visible, the more likely it is to be relevant.
392  switch (Scope) {
393  case GlobalScope:
394  break;
395  case FileScope:
396  Score *= 0.5f;
397  break;
398  default:
399  // TODO: Handle other scopes as we start to use them for index results.
400  break;
401  }
402  }
403 
404  if (TypeMatchesPreferred)
405  Score *= 5.0;
406 
407  // Penalize non-instance members when they are accessed via a class instance.
408  if (!IsInstanceMember &&
409  (Context == CodeCompletionContext::CCC_DotMemberAccess ||
410  Context == CodeCompletionContext::CCC_ArrowMemberAccess)) {
411  Score *= 0.2f;
412  }
413 
414  if (InBaseClass)
415  Score *= 0.5f;
416 
417  // Penalize for FixIts.
418  if (NeedsFixIts)
419  Score *= 0.5f;
420 
421  return Score;
422 }
423 
424 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
425  const SymbolRelevanceSignals &S) {
426  OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate());
427  OS << llvm::formatv("\tName: {0}\n", S.Name);
428  OS << llvm::formatv("\tName match: {0}\n", S.NameMatch);
429  if (S.ContextWords)
430  OS << llvm::formatv(
431  "\tMatching context word: {0}\n",
432  wordMatching(S.Name, S.ContextWords).getValueOr("<none>"));
433  OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden);
434  OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts);
435  OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
436  OS << llvm::formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
437  OS << llvm::formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
438  OS << llvm::formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
439 
440  OS << llvm::formatv("\tSymbol URI: {0}\n", S.SymbolURI);
441  OS << llvm::formatv("\tSymbol scope: {0}\n",
442  S.SymbolScope ? *S.SymbolScope : "<None>");
443 
444  if (S.FileProximityMatch) {
446  OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n",
447  Score.first, Score.second);
448  }
449  OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore);
450 
451  OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope);
452  if (S.ScopeProximityMatch)
453  OS << llvm::formatv("\tIndex scope boost: {0}\n",
455 
456  OS << llvm::formatv(
457  "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n",
459 
460  return OS;
461 }
462 
463 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
464  return SymbolQuality * SymbolRelevance;
465 }
466 
467 // Produces an integer that sorts in the same order as F.
468 // That is: a < b <==> encodeFloat(a) < encodeFloat(b).
469 static uint32_t encodeFloat(float F) {
470  static_assert(std::numeric_limits<float>::is_iec559, "");
471  constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
472 
473  // Get the bits of the float. Endianness is the same as for integers.
474  uint32_t U = llvm::FloatToBits(F);
475  // IEEE 754 floats compare like sign-magnitude integers.
476  if (U & TopBit) // Negative float.
477  return 0 - U; // Map onto the low half of integers, order reversed.
478  return U + TopBit; // Positive floats map onto the high half of integers.
479 }
480 
481 std::string sortText(float Score, llvm::StringRef Name) {
482  // We convert -Score to an integer, and hex-encode for readability.
483  // Example: [0.5, "foo"] -> "41000000foo"
484  std::string S;
485  llvm::raw_string_ostream OS(S);
486  llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
487  /*Width=*/2 * sizeof(Score));
488  OS << Name;
489  OS.flush();
490  return S;
491 }
492 
493 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
494  const SignatureQualitySignals &S) {
495  OS << llvm::formatv("=== Signature Quality:\n");
496  OS << llvm::formatv("\tNumber of parameters: {0}\n", S.NumberOfParameters);
497  OS << llvm::formatv("\tNumber of optional parameters: {0}\n",
499  OS << llvm::formatv("\tKind: {0}\n", S.Kind);
500  return OS;
501 }
502 
503 } // namespace clangd
504 } // namespace clang
const FunctionDecl * Decl
static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND)
Definition: Quality.cpp:62
void merge(const CodeCompletionResult &SemaCCResult)
Definition: Quality.cpp:180
static float scopeBoost(ScopeDistance &Distance, llvm::Optional< llvm::StringRef > SymbolScope)
Definition: Quality.cpp:328
unsigned distance(llvm::StringRef SymbolScope)
llvm::APSInt Lower
llvm::Optional< llvm::StringRef > SymbolScope
Definition: Quality.h:110
static uint32_t encodeFloat(float F)
Definition: Quality.cpp:469
enum clang::clangd::SymbolQualitySignals::SymbolCategory Category
void merge(const CodeCompletionResult &SemaResult)
Definition: Quality.cpp:293
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:534
std::string sortText(float Score, llvm::StringRef Name)
Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP.
Definition: Quality.cpp:481
Symbol is visible to other files (not e.g. a static helper function).
Definition: Symbol.h:125
static constexpr unsigned Unreachable
Definition: FileDistance.h:74
static bool isReserved(llvm::StringRef Name)
Definition: Quality.cpp:37
CodeCompletionContext::Kind Context
Definition: Quality.h:127
llvm::StringRef Scope
The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
Definition: Symbol.h:44
Documents should not be synced at all.
Attributes of a symbol that affect how much we like it.
Definition: Quality.h:57
llvm::StringRef Name
The name of the symbol (for ContextWords). Must be explicitly assigned.
Definition: Quality.h:89
enum clang::clangd::SymbolRelevanceSignals::AccessibleScope Scope
CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind
Definition: Quality.h:198
unsigned References
The number of translation units that reference this symbol from their main file.
Definition: Symbol.h:59
index::SymbolInfo SymInfo
The symbol information, like symbol kind.
Definition: Symbol.h:40
llvm::Optional< float > Score
Symbol is an implementation detail.
Definition: Symbol.h:123
bool NeedsFixIts
Whether fixits needs to be applied for that completion or not.
Definition: Quality.h:96
llvm::StringSet * ContextWords
Lowercase words relevant to the context (e.g. near the completion point).
Definition: Quality.h:93
enum clang::clangd::SymbolRelevanceSignals::QueryType Query
clang::find_all_symbols::SymbolInfo SymbolInfo
#define MAP(DeclType, Category)
SymbolFlag Flags
Definition: Symbol.h:128
*that are placed right before the argument **code *void f(bool foo)
Checks that argument comments match parameter names.
static constexpr llvm::StringLiteral Name
SymbolLocation CanonicalDeclaration
The location of the preferred declaration of the symbol.
Definition: Symbol.h:56
const Symbol * IndexResult
A context is an immutable container for per-request data that must be propagated through layers that ...
Definition: Context.h:69
static bool hasDeclInMainFile(const Decl &D)
Definition: Quality.cpp:43
unsigned distance(llvm::StringRef URI)
llvm::StringRef SymbolURI
These are used to calculate proximity between the index symbol and the query.
Definition: Quality.h:102
static llvm::Optional< llvm::StringRef > wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords)
Definition: Quality.cpp:339
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance)
Combine symbol quality and relevance into a single score.
Definition: Quality.cpp:463
static bool isInstanceMember(const NamedDecl *ND)
Definition: Quality.cpp:159
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
unsigned References
Support lookups like FileDistance, but the lookup keys are symbol scopes.
Definition: FileDistance.h:117
llvm::StringRef Name
The unqualified name of the symbol, e.g. "bar" (for ns::bar).
Definition: Symbol.h:42
float SemaFileProximityScore
FIXME: unify with index proximity score - signals should be source-independent.
Definition: Quality.h:106
static SymbolRelevanceSignals::AccessibleScope computeScope(const NamedDecl *D)
Definition: Quality.cpp:261
static std::pair< float, unsigned > uriProximity(llvm::StringRef SymbolURI, URIDistance *D)
Definition: Quality.cpp:319
bool isImplementationDetail(const Decl *D)
Returns true if the declaration is considered implementation detail based on heuristics.
Definition: AST.cpp:157
float NameMatch
0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
Definition: Quality.h:91
CaptureMode CM
Indicates if the symbol is deprecated.
Definition: Symbol.h:121
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Attributes of a symbol-query pair that affect how much we like it.
Definition: Quality.h:87
std::string Word
NodeType Type
static bool hasUsingDeclInMainFile(const CodeCompletionResult &R)
Definition: Quality.cpp:52