59 #include "llvm/ADT/Optional.h"
60 #include "llvm/Support/Format.h"
65 constexpr
int FuzzyMatcher::MaxPat;
66 constexpr
int FuzzyMatcher::MaxWord;
68 static char lower(
char C) {
return C >=
'A' && C <=
'Z' ? C + (
'a' -
'A') : C; }
77 : PatN(std::min<int>(MaxPat, Pattern.size())),
78 ScoreScale(PatN ? float{1} / (
PerfectBonus * PatN) : 0), WordN(0) {
79 std::copy(Pattern.begin(), Pattern.begin() + PatN, Pat);
80 for (
int I = 0; I < PatN; ++I)
81 LowPat[I] =
lower(Pat[I]);
82 Scores[0][0][Miss] = {0, Miss};
84 for (
int P = 0; P <= PatN; ++P)
85 for (
int W = 0; W < P; ++W)
86 for (
Action A : {Miss, Match})
89 llvm::makeMutableArrayRef(PatRole, PatN));
93 if (!(WordContainsPattern = init(
Word)))
98 auto Best = std::max(Scores[PatN][WordN][Miss].
Score,
99 Scores[PatN][WordN][Match].
Score);
103 ScoreScale * std::min(
PerfectBonus * PatN, std::max<int>(0, Best));
115 0x00, 0x00, 0x00, 0x00,
116 0x00, 0x00, 0x00, 0x00,
117 0xff, 0xff, 0xff, 0xff,
118 0x55, 0x55, 0xf5, 0xff,
119 0xab, 0xaa, 0xaa, 0xaa,
120 0xaa, 0xaa, 0xea, 0xff,
121 0x57, 0x55, 0x55, 0x55,
122 0x55, 0x55, 0xd5, 0x3f,
123 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
124 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
125 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
126 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
144 0x00, 0xaa, 0xaa, 0xff,
145 0x00, 0x55, 0xaa, 0xff,
146 0x00, 0x55, 0x59, 0xff,
147 0x00, 0xaa, 0xaa, 0xff,
151 template <
typename T>
static T
packedLookup(
const uint8_t *Data,
int I) {
152 return static_cast<T>((Data[I >> 2] >> ((I & 3) * 2)) & 3);
155 llvm::MutableArrayRef<CharRole> Roles) {
156 assert(
Text.size() == Roles.size());
157 if (
Text.size() == 0)
165 auto Rotate = [&](
CharType T) { Types = ((Types << 2) | T) & 0x3f; };
166 for (
unsigned I = 0; I <
Text.size() - 1; ++I) {
169 TypeSet |= 1 <<
Type;
171 Roles[I] = packedLookup<CharRole>(
CharRoles, Types);
175 Roles[
Text.size() - 1] = packedLookup<CharRole>(
CharRoles, Types);
181 bool FuzzyMatcher::init(llvm::StringRef NewWord) {
182 WordN = std::min<int>(MaxWord, NewWord.size());
185 std::copy(NewWord.begin(), NewWord.begin() + WordN, Word);
188 for (
int I = 0; I < WordN; ++I)
189 LowWord[I] =
lower(Word[I]);
192 for (
int W = 0, P = 0; P != PatN; ++W) {
195 if (LowWord[W] == LowPat[P])
203 llvm::makeMutableArrayRef(WordRole, WordN));
216 void FuzzyMatcher::buildGraph() {
217 for (
int W = 0; W < WordN; ++W) {
218 Scores[0][W + 1][Miss] = {Scores[0][W][Miss].Score - skipPenalty(W, Miss),
222 for (
int P = 0; P < PatN; ++P) {
223 for (
int W = P; W < WordN; ++W) {
224 auto &
Score = Scores[P + 1][W + 1], &PreMiss = Scores[P + 1][W];
226 auto MatchMissScore = PreMiss[Match].Score;
227 auto MissMissScore = PreMiss[Miss].Score;
229 MatchMissScore -= skipPenalty(W, Match);
230 MissMissScore -= skipPenalty(W, Miss);
232 Score[Miss] = (MatchMissScore > MissMissScore)
233 ? ScoreInfo{MatchMissScore, Match}
234 : ScoreInfo{MissMissScore, Miss};
236 auto &PreMatch = Scores[P][W];
237 auto MatchMatchScore =
238 allowMatch(P, W, Match)
239 ? PreMatch[Match].Score + matchBonus(P, W, Match)
241 auto MissMatchScore = allowMatch(P, W, Miss)
242 ? PreMatch[Miss].Score + matchBonus(P, W, Miss)
244 Score[Match] = (MatchMatchScore > MissMatchScore)
245 ? ScoreInfo{MatchMatchScore, Match}
246 : ScoreInfo{MissMatchScore, Miss};
251 bool FuzzyMatcher::allowMatch(
int P,
int W,
Action Last)
const {
252 if (LowPat[P] != LowWord[W])
262 if (WordRole[W] ==
Tail &&
263 (Word[W] == LowWord[W] || !(WordTypeSet & 1 <<
Lower)))
269 int FuzzyMatcher::skipPenalty(
int W,
Action Last)
const {
272 if (WordRole[W] ==
Head)
280 int FuzzyMatcher::matchBonus(
int P,
int W,
Action Last)
const {
281 assert(LowPat[P] == LowWord[W]);
283 bool IsPatSingleCase =
284 (PatTypeSet == 1 <<
Lower) || (PatTypeSet == 1 <<
Upper);
288 if (Pat[P] == Word[W] ||
289 (WordRole[W] ==
Head && (IsPatSingleCase || PatRole[P] ==
Head)))
293 if (W == 0 || Last == Match)
296 if (WordRole[W] ==
Tail && P && Last == Miss)
299 if (PatRole[P] ==
Head && WordRole[W] ==
Tail)
302 if (P == 0 && WordRole[W] ==
Tail)
309 llvm::SmallString<256> Result;
310 OS <<
"=== Match \"" << llvm::StringRef(Word, WordN) <<
"\" against ["
311 << llvm::StringRef(Pat, PatN) <<
"] ===\n";
313 OS <<
"Pattern is empty: perfect match.\n";
314 return Result = llvm::StringRef(Word, WordN);
317 OS <<
"Word is empty: no match.\n";
320 if (!WordContainsPattern) {
321 OS <<
"Substring check failed.\n";
323 }
else if (
isAwful(std::max(Scores[PatN][WordN][Match].
Score,
324 Scores[PatN][WordN][Miss].
Score))) {
325 OS <<
"Substring check passed, but all matches are forbidden\n";
327 if (!(PatTypeSet & 1 <<
Upper))
328 OS <<
"Lowercase query, so scoring ignores case\n";
334 (Scores[PatN][WordN][Match].Score > Scores[PatN][WordN][Miss].Score)
339 for (
int W = WordN - 1, P = PatN - 1; W >= 0; --W) {
341 const auto &Cell = Scores[P + 1][W + 1][Last];
344 const auto &Prev = Scores[P + 1][W][Cell.Prev];
345 S[W] = Cell.Score - Prev.Score;
348 for (
int I = 0; I < WordN; ++I) {
349 if (A[I] == Match && (I == 0 || A[I - 1] == Miss))
350 Result.push_back(
'[');
351 if (A[I] == Miss && I > 0 && A[I - 1] == Match)
352 Result.push_back(
']');
353 Result.push_back(Word[I]);
355 if (A[WordN - 1] == Match)
356 Result.push_back(
']');
358 for (
char C : llvm::StringRef(Word, WordN))
359 OS <<
" " << C <<
" ";
361 for (
int I = 0, J = 0; I < WordN; I++)
362 OS <<
" " << (A[I] == Match ? Pat[J++] :
' ') <<
" ";
364 for (
int I = 0; I < WordN; I++)
365 OS << llvm::format(
"%2d ", S[I]);
368 OS <<
"\nSegmentation:";
369 OS <<
"\n'" << llvm::StringRef(Word, WordN) <<
"'\n ";
370 for (
int I = 0; I < WordN; ++I)
371 OS <<
"?-+ "[static_cast<int>(WordRole[I])];
372 OS <<
"\n[" << llvm::StringRef(Pat, PatN) <<
"]\n ";
373 for (
int I = 0; I < PatN; ++I)
374 OS <<
"?-+ "[static_cast<int>(PatRole[I])];
377 OS <<
"\nScoring table (last-Miss, last-Match):\n";
379 for (
char C : llvm::StringRef(Word, WordN))
380 OS <<
" " << C <<
" ";
382 OS <<
"-+----" << std::string(WordN * 4,
'-') <<
"\n";
383 for (
int I = 0; I <= PatN; ++I) {
384 for (Action A : {Miss, Match}) {
385 OS << ((I && A == Miss) ? Pat[I - 1] :
' ') <<
"|";
386 for (
int J = 0; J <= WordN; ++J) {
388 OS << llvm::format(
"%3d%c", Scores[I][J][A].
Score,
389 Scores[I][J][A].Prev == Match ?
'*' :
' ');