143 lines
3.8 KiB
C++
143 lines
3.8 KiB
C++
//===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "FuzzySymbolIndex.h"
|
|
#include "llvm/Support/Regex.h"
|
|
|
|
using clang::find_all_symbols::SymbolAndSignals;
|
|
using llvm::StringRef;
|
|
|
|
namespace clang {
|
|
namespace include_fixer {
|
|
namespace {
|
|
|
|
class MemSymbolIndex : public FuzzySymbolIndex {
|
|
public:
|
|
MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
|
|
for (auto &Symbol : Symbols) {
|
|
auto Tokens = tokenize(Symbol.Symbol.getName());
|
|
this->Symbols.emplace_back(
|
|
StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
|
|
std::move(Symbol));
|
|
}
|
|
}
|
|
|
|
std::vector<SymbolAndSignals> search(StringRef Query) override {
|
|
auto Tokens = tokenize(Query);
|
|
llvm::Regex Pattern("^" + queryRegexp(Tokens));
|
|
std::vector<SymbolAndSignals> Results;
|
|
for (const Entry &E : Symbols)
|
|
if (Pattern.match(E.first))
|
|
Results.push_back(E.second);
|
|
return Results;
|
|
}
|
|
|
|
private:
|
|
using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
|
|
std::vector<Entry> Symbols;
|
|
};
|
|
|
|
// Helpers for tokenize state machine.
|
|
enum TokenizeState {
|
|
EMPTY, // No pending characters.
|
|
ONE_BIG, // Read one uppercase letter, could be WORD or Word.
|
|
BIG_WORD, // Reading an uppercase WORD.
|
|
SMALL_WORD, // Reading a lowercase word.
|
|
NUMBER // Reading a number.
|
|
};
|
|
|
|
enum CharType { UPPER, LOWER, DIGIT, MISC };
|
|
CharType classify(char c) {
|
|
if (isupper(c))
|
|
return UPPER;
|
|
if (islower(c))
|
|
return LOWER;
|
|
if (isdigit(c))
|
|
return DIGIT;
|
|
return MISC;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
|
|
std::vector<std::string> Result;
|
|
// State describes the treatment of text from Start to I.
|
|
// Once text is Flush()ed into Result, we're done with it and advance Start.
|
|
TokenizeState State = EMPTY;
|
|
size_t Start = 0;
|
|
auto Flush = [&](size_t End) {
|
|
if (State != EMPTY) {
|
|
Result.push_back(Text.substr(Start, End - Start).lower());
|
|
State = EMPTY;
|
|
}
|
|
Start = End;
|
|
};
|
|
for (size_t I = 0; I < Text.size(); ++I) {
|
|
CharType Type = classify(Text[I]);
|
|
if (Type == MISC)
|
|
Flush(I);
|
|
else if (Type == LOWER)
|
|
switch (State) {
|
|
case BIG_WORD:
|
|
Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
|
|
LLVM_FALLTHROUGH;
|
|
case ONE_BIG:
|
|
State = SMALL_WORD;
|
|
LLVM_FALLTHROUGH;
|
|
case SMALL_WORD:
|
|
break;
|
|
default:
|
|
Flush(I);
|
|
State = SMALL_WORD;
|
|
}
|
|
else if (Type == UPPER)
|
|
switch (State) {
|
|
case ONE_BIG:
|
|
State = BIG_WORD;
|
|
LLVM_FALLTHROUGH;
|
|
case BIG_WORD:
|
|
break;
|
|
default:
|
|
Flush(I);
|
|
State = ONE_BIG;
|
|
}
|
|
else if (Type == DIGIT && State != NUMBER) {
|
|
Flush(I);
|
|
State = NUMBER;
|
|
}
|
|
}
|
|
Flush(Text.size());
|
|
return Result;
|
|
}
|
|
|
|
std::string
|
|
FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
|
|
std::string Result;
|
|
for (size_t I = 0; I < Tokens.size(); ++I) {
|
|
if (I)
|
|
Result.append("[[:alnum:]]* ");
|
|
for (size_t J = 0; J < Tokens[I].size(); ++J) {
|
|
if (J)
|
|
Result.append("([[:alnum:]]* )?");
|
|
Result.push_back(Tokens[I][J]);
|
|
}
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
|
|
FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
|
|
auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
|
|
if (!Buffer)
|
|
return llvm::errorCodeToError(Buffer.getError());
|
|
return llvm::make_unique<MemSymbolIndex>(
|
|
find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
|
|
}
|
|
|
|
} // namespace include_fixer
|
|
} // namespace clang
|