clang-tools  7.0.0
Quality.cpp
Go to the documentation of this file.
1 //===--- Quality.cpp --------------------------------------------*- C++-*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 #include "Quality.h"
10 #include "FileDistance.h"
11 #include "URI.h"
12 #include "index/Index.h"
13 #include "clang/AST/ASTContext.h"
14 #include "clang/AST/Decl.h"
15 #include "clang/AST/DeclCXX.h"
16 #include "clang/AST/DeclTemplate.h"
17 #include "clang/AST/DeclVisitor.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "clang/Sema/CodeCompleteConsumer.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cmath>
26 
27 namespace clang {
28 namespace clangd {
29 using namespace llvm;
30 static bool isReserved(StringRef Name) {
31  // FIXME: Should we exclude _Bool and others recognized by the standard?
32  return Name.size() >= 2 && Name[0] == '_' &&
33  (isUppercase(Name[1]) || Name[1] == '_');
34 }
35 
36 static bool hasDeclInMainFile(const Decl &D) {
37  auto &SourceMgr = D.getASTContext().getSourceManager();
38  for (auto *Redecl : D.redecls()) {
39  auto Loc = SourceMgr.getSpellingLoc(Redecl->getLocation());
40  if (SourceMgr.isWrittenInMainFile(Loc))
41  return true;
42  }
43  return false;
44 }
45 
46 static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) {
47  const auto &Context = R.Declaration->getASTContext();
48  const auto &SourceMgr = Context.getSourceManager();
49  if (R.ShadowDecl) {
50  const auto Loc = SourceMgr.getExpansionLoc(R.ShadowDecl->getLocation());
51  if (SourceMgr.isWrittenInMainFile(Loc))
52  return true;
53  }
54  return false;
55 }
56 
57 static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
58  class Switch
59  : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
60  public:
61 #define MAP(DeclType, Category) \
62  SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
63  return SymbolQualitySignals::Category; \
64  }
65  MAP(NamespaceDecl, Namespace);
66  MAP(NamespaceAliasDecl, Namespace);
67  MAP(TypeDecl, Type);
68  MAP(TypeAliasTemplateDecl, Type);
69  MAP(ClassTemplateDecl, Type);
70  MAP(CXXConstructorDecl, Constructor);
71  MAP(ValueDecl, Variable);
72  MAP(VarTemplateDecl, Variable);
73  MAP(FunctionDecl, Function);
74  MAP(FunctionTemplateDecl, Function);
75  MAP(Decl, Unknown);
76 #undef MAP
77  };
78  return Switch().Visit(&ND);
79 }
80 
82 categorize(const CodeCompletionResult &R) {
83  if (R.Declaration)
84  return categorize(*R.Declaration);
85  if (R.Kind == CodeCompletionResult::RK_Macro)
87  // Everything else is a keyword or a pattern. Patterns are mostly keywords
88  // too, except a few which we recognize by cursor kind.
89  switch (R.CursorKind) {
90  case CXCursor_CXXMethod:
92  case CXCursor_ModuleImportDecl:
94  case CXCursor_MacroDefinition:
96  case CXCursor_TypeRef:
98  case CXCursor_MemberRef:
100  case CXCursor_Constructor:
102  default:
104  }
105 }
106 
109  switch (D.Kind) {
110  case index::SymbolKind::Namespace:
111  case index::SymbolKind::NamespaceAlias:
113  case index::SymbolKind::Macro:
115  case index::SymbolKind::Enum:
116  case index::SymbolKind::Struct:
117  case index::SymbolKind::Class:
118  case index::SymbolKind::Protocol:
119  case index::SymbolKind::Extension:
120  case index::SymbolKind::Union:
121  case index::SymbolKind::TypeAlias:
123  case index::SymbolKind::Function:
124  case index::SymbolKind::ClassMethod:
125  case index::SymbolKind::InstanceMethod:
126  case index::SymbolKind::StaticMethod:
127  case index::SymbolKind::InstanceProperty:
128  case index::SymbolKind::ClassProperty:
129  case index::SymbolKind::StaticProperty:
130  case index::SymbolKind::Destructor:
131  case index::SymbolKind::ConversionFunction:
133  case index::SymbolKind::Constructor:
135  case index::SymbolKind::Variable:
136  case index::SymbolKind::Field:
137  case index::SymbolKind::EnumConstant:
138  case index::SymbolKind::Parameter:
140  case index::SymbolKind::Using:
141  case index::SymbolKind::Module:
144  }
145  llvm_unreachable("Unknown index::SymbolKind");
146 }
147 
148 static bool isInstanceMember(const NamedDecl *ND) {
149  if (!ND)
150  return false;
151  if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND))
152  ND = TP->TemplateDecl::getTemplatedDecl();
153  if (const auto *CM = dyn_cast<CXXMethodDecl>(ND))
154  return !CM->isStatic();
155  return isa<FieldDecl>(ND); // Note that static fields are VarDecl.
156 }
157 
158 static bool isInstanceMember(const index::SymbolInfo &D) {
159  switch (D.Kind) {
160  case index::SymbolKind::InstanceMethod:
161  case index::SymbolKind::InstanceProperty:
162  case index::SymbolKind::Field:
163  return true;
164  default:
165  return false;
166  }
167 }
168 
169 void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
170  if (SemaCCResult.Availability == CXAvailability_Deprecated)
171  Deprecated = true;
172 
173  Category = categorize(SemaCCResult);
174 
175  if (SemaCCResult.Declaration) {
176  if (auto *ID = SemaCCResult.Declaration->getIdentifier())
177  ReservedName = ReservedName || isReserved(ID->getName());
178  } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
179  ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName());
180 }
181 
183  References = std::max(IndexResult.References, References);
184  Category = categorize(IndexResult.SymInfo);
185  ReservedName = ReservedName || isReserved(IndexResult.Name);
186 }
187 
189  float Score = 1;
190 
191  // This avoids a sharp gradient for tail symbols, and also neatly avoids the
192  // question of whether 0 references means a bad symbol or missing data.
193  if (References >= 10) {
194  // Use a sigmoid style boosting function, which flats out nicely for large
195  // numbers (e.g. 2.58 for 1M refererences).
196  // The following boosting function is equivalent to:
197  // m = 0.06
198  // f = 12.0
199  // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59
200  // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82),
201  // (10K, 2.21), (100K, 2.58), (1M, 2.94)
202  float S = std::pow(References, -0.06);
203  Score *= 6.0 * (1 - S) / (1 + S) + 0.59;
204  }
205 
206  if (Deprecated)
207  Score *= 0.1f;
208  if (ReservedName)
209  Score *= 0.1f;
210 
211  switch (Category) {
212  case Keyword: // Often relevant, but misses most signals.
213  Score *= 4; // FIXME: important keywords should have specific boosts.
214  break;
215  case Type:
216  case Function:
217  case Variable:
218  Score *= 1.1f;
219  break;
220  case Namespace:
221  Score *= 0.8f;
222  break;
223  case Macro:
224  Score *= 0.2f;
225  break;
226  case Unknown:
227  case Constructor: // No boost constructors so they are after class types.
228  break;
229  }
230 
231  return Score;
232 }
233 
234 raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
235  OS << formatv("=== Symbol quality: {0}\n", S.evaluate());
236  OS << formatv("\tReferences: {0}\n", S.References);
237  OS << formatv("\tDeprecated: {0}\n", S.Deprecated);
238  OS << formatv("\tReserved name: {0}\n", S.ReservedName);
239  OS << formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
240  return OS;
241 }
242 
244 computeScope(const NamedDecl *D) {
245  // Injected "Foo" within the class "Foo" has file scope, not class scope.
246  const DeclContext *DC = D->getDeclContext();
247  if (auto *R = dyn_cast_or_null<RecordDecl>(D))
248  if (R->isInjectedClassName())
249  DC = DC->getParent();
250  // Class constructor should have the same scope as the class.
251  if (isa<CXXConstructorDecl>(D))
252  DC = DC->getParent();
253  bool InClass = false;
254  for (; !DC->isFileContext(); DC = DC->getParent()) {
255  if (DC->isFunctionOrMethod())
257  InClass = InClass || DC->isRecord();
258  }
259  if (InClass)
261  // This threshold could be tweaked, e.g. to treat module-visible as global.
262  if (D->getLinkageInternal() < ExternalLinkage)
265 }
266 
268  // FIXME: Index results always assumed to be at global scope. If Scope becomes
269  // relevant to non-completion requests, we should recognize class members etc.
270 
271  SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
272  IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
273 }
274 
275 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
276  if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
277  SemaCCResult.Availability == CXAvailability_NotAccessible)
278  Forbidden = true;
279 
280  if (SemaCCResult.Declaration) {
281  // We boost things that have decls in the main file. We give a fixed score
282  // for all other declarations in sema as they are already included in the
283  // translation unit.
284  float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) ||
285  hasUsingDeclInMainFile(SemaCCResult))
286  ? 1.0
287  : 0.6;
288  SemaProximityScore = std::max(DeclProximity, SemaProximityScore);
289  IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
290  }
291 
292  // Declarations are scoped, others (like macros) are assumed global.
293  if (SemaCCResult.Declaration)
294  Scope = std::min(Scope, computeScope(SemaCCResult.Declaration));
295 }
296 
297 static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
298  URIDistance *D) {
299  if (!D || SymbolURI.empty())
300  return {0.f, 0u};
301  unsigned Distance = D->distance(SymbolURI);
302  // Assume approximately default options are used for sensible scoring.
303  return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
304 }
305 
307  float Score = 1;
308 
309  if (Forbidden)
310  return 0;
311 
312  Score *= NameMatch;
313 
314  // Proximity scores are [0,1] and we translate them into a multiplier in the
315  // range from 1 to 3.
316  Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
317  SemaProximityScore);
318 
319  // Symbols like local variables may only be referenced within their scope.
320  // Conversely if we're in that scope, it's likely we'll reference them.
321  if (Query == CodeComplete) {
322  // The narrower the scope where a symbol is visible, the more likely it is
323  // to be relevant when it is available.
324  switch (Scope) {
325  case GlobalScope:
326  break;
327  case FileScope:
328  Score *= 1.5;
329  break;
330  case ClassScope:
331  Score *= 2;
332  break;
333  case FunctionScope:
334  Score *= 4;
335  break;
336  }
337  }
338 
339  // Penalize non-instance members when they are accessed via a class instance.
340  if (!IsInstanceMember &&
341  (Context == CodeCompletionContext::CCC_DotMemberAccess ||
342  Context == CodeCompletionContext::CCC_ArrowMemberAccess)) {
343  Score *= 0.5;
344  }
345 
346  return Score;
347 }
348 
349 raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
350  OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
351  OS << formatv("\tName match: {0}\n", S.NameMatch);
352  OS << formatv("\tForbidden: {0}\n", S.Forbidden);
353  OS << formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
354  OS << formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
355  OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
356  if (S.FileProximityMatch) {
357  auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
358  OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
359  Score.second);
360  }
361  OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
362  OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
363  OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
364  return OS;
365 }
366 
367 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
368  return SymbolQuality * SymbolRelevance;
369 }
370 
371 // Produces an integer that sorts in the same order as F.
372 // That is: a < b <==> encodeFloat(a) < encodeFloat(b).
373 static uint32_t encodeFloat(float F) {
374  static_assert(std::numeric_limits<float>::is_iec559, "");
375  constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
376 
377  // Get the bits of the float. Endianness is the same as for integers.
378  uint32_t U = FloatToBits(F);
379  // IEEE 754 floats compare like sign-magnitude integers.
380  if (U & TopBit) // Negative float.
381  return 0 - U; // Map onto the low half of integers, order reversed.
382  return U + TopBit; // Positive floats map onto the high half of integers.
383 }
384 
385 std::string sortText(float Score, llvm::StringRef Name) {
386  // We convert -Score to an integer, and hex-encode for readability.
387  // Example: [0.5, "foo"] -> "41000000foo"
388  std::string S;
389  llvm::raw_string_ostream OS(S);
390  write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
391  /*Width=*/2 * sizeof(Score));
392  OS << Name;
393  OS.flush();
394  return S;
395 }
396 
397 } // namespace clangd
398 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
llvm::StringRef Name
static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND)
Definition: Quality.cpp:57
void merge(const CodeCompletionResult &SemaCCResult)
Definition: Quality.cpp:169
Some operations such as code completion produce a set of candidates.
static uint32_t encodeFloat(float F)
Definition: Quality.cpp:373
enum clang::clangd::SymbolQualitySignals::SymbolCategory Category
void merge(const CodeCompletionResult &SemaResult)
Definition: Quality.cpp:275
std::string sortText(float Score, llvm::StringRef Name)
Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP.
Definition: Quality.cpp:385
CodeCompletionContext::Kind Context
Definition: Quality.h:103
Attributes of a symbol that affect how much we like it.
Definition: Quality.h:49
float SemaProximityScore
Proximity between best declaration and the query.
Definition: Quality.h:88
enum clang::clangd::SymbolRelevanceSignals::AccessibleScope Scope
unsigned References
Definition: Index.h:187
index::SymbolInfo SymInfo
Definition: Index.h:168
static bool isReserved(StringRef Name)
Definition: Quality.cpp:30
enum clang::clangd::SymbolRelevanceSignals::QueryType Query
clang::find_all_symbols::SymbolInfo SymbolInfo
#define MAP(DeclType, Category)
*that are placed right before the argument **code *void f(bool foo)
Checks that argument comments match parameter names.
llvm::StringRef FileURI
Definition: Index.h:39
SymbolLocation CanonicalDeclaration
Definition: Index.h:184
const Symbol * IndexResult
static std::pair< float, unsigned > proximityScore(llvm::StringRef SymbolURI, URIDistance *D)
Definition: Quality.cpp:297
A context is an immutable container for per-request data that must be propagated through layers that ...
Definition: Context.h:70
static bool hasDeclInMainFile(const Decl &D)
Definition: Quality.cpp:36
unsigned distance(llvm::StringRef URI)
llvm::StringRef SymbolURI
This is used to calculate proximity between the index symbol and the query.
Definition: Quality.h:84
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance)
Combine symbol quality and relevance into a single score.
Definition: Quality.cpp:367
static bool isInstanceMember(const NamedDecl *ND)
Definition: Quality.cpp:148
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::StringRef Name
Definition: Index.h:170
static SymbolRelevanceSignals::AccessibleScope computeScope(const NamedDecl *D)
Definition: Quality.cpp:244
float NameMatch
0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
Definition: Quality.h:78
Attributes of a symbol-query pair that affect how much we like it.
Definition: Quality.h:76
raw_ostream & operator<<(raw_ostream &OS, const CodeCompletion &C)
static bool hasUsingDeclInMainFile(const CodeCompletionResult &R)
Definition: Quality.cpp:46