clang-tools  7.0.0
FileDistance.h
Go to the documentation of this file.
1 //===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This library measures the distance between file paths.
11 // It's used for ranking symbols, e.g. in code completion.
12 // |foo/bar.h -> foo/bar.h| = 0.
13 // |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
14 // This is an edit-distance, where edits go up or down the directory tree.
15 // It's not symmetrical, the costs of going up and down may not match.
16 //
17 // Dealing with multiple sources:
18 // In practice we care about the distance from a source file, but files near
19 // its main-header and #included files are considered "close".
20 // So we start with a set of (anchor, cost) pairs, and call the distance to a
21 // path the minimum of `cost + |source -> path|`.
22 //
23 // We allow each source to limit the number of up-traversals paths may start
24 // with. Up-traversals may reach things that are not "semantically near".
25 //
26 // Symbol URI schemes:
27 // Symbol locations may be represented by URIs rather than file paths directly.
28 // In this case we want to perform distance computations in URI space rather
29 // than in file-space, without performing redundant conversions.
30 // Therefore we have a lookup structure that accepts URIs, so that intermediate
31 // calculations for the same scheme can be reused.
32 //
33 // Caveats:
34 // Assuming up and down traversals each have uniform costs is simplistic.
35 // Often there are "semantic roots" whose children are almost unrelated.
36 // (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
37 //
38 //===----------------------------------------------------------------------===//
39 
40 #include "URI.h"
41 #include "llvm/ADT/DenseMap.h"
42 #include "llvm/ADT/DenseMapInfo.h"
43 #include "llvm/ADT/SmallString.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/Path.h"
47 #include "llvm/Support/StringSaver.h"
48 
49 namespace clang {
50 namespace clangd {
51 
53  unsigned UpCost = 2; // |foo/bar.h -> foo|
54  unsigned DownCost = 1; // |foo -> foo/bar.h|
55  unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
56 };
57 
58 struct SourceParams {
59  // Base cost for paths starting at this source.
60  unsigned Cost = 0;
61  // Limits the number of upwards traversals allowed from this source.
62  unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
63 };
64 
65 // Supports lookups to find the minimum distance to a file from any source.
66 // This object should be reused, it memoizes intermediate computations.
67 class FileDistance {
68 public:
69  static constexpr unsigned kUnreachable = std::numeric_limits<unsigned>::max();
70 
71  FileDistance(llvm::StringMap<SourceParams> Sources,
72  const FileDistanceOptions &Opts = {});
73 
74  // Computes the minimum distance from any source to the file path.
75  unsigned distance(llvm::StringRef Path);
76 
77 private:
78  // Costs computed so far. Always contains sources and their ancestors.
79  // We store hash codes only. Collisions are rare and consequences aren't dire.
80  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
82 };
83 
84 // Supports lookups like FileDistance, but the lookup keys are URIs.
85 // We convert each of the sources to the scheme of the URI and do a FileDistance
86 // comparison on the bodies.
87 class URIDistance {
88 public:
89  URIDistance(llvm::StringMap<SourceParams> Sources,
90  const FileDistanceOptions &Opts = {})
91  : Sources(Sources), Opts(Opts) {}
92 
93  // Computes the minimum distance from any source to the URI.
94  // Only sources that can be mapped into the URI's scheme are considered.
95  unsigned distance(llvm::StringRef URI);
96 
97 private:
98  // Returns the FileDistance for a URI scheme, creating it if needed.
99  FileDistance &forScheme(llvm::StringRef Scheme);
100 
101  // We cache the results using the original strings so we can skip URI parsing.
102  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
103  llvm::StringMap<SourceParams> Sources;
104  llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
105  FileDistanceOptions Opts;
106 };
107 
108 } // namespace clangd
109 } // namespace clang
static const char * Scheme
Definition: URI.cpp:33
URIDistance(llvm::StringMap< SourceParams > Sources, const FileDistanceOptions &Opts={})
Definition: FileDistance.h:89
std::string Path
A typedef to represent a file path.
Definition: Path.h:21
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
A URI describes the location of a source file.
Definition: URI.h:29