中文分词接口统一处理;
This commit is contained in:
parent
308b9b642f
commit
0725b5b097
|
@ -1 +1 @@
|
||||||
Subproject commit 124c661581e0f2e10a47d3cc237f7ade404f1bec
|
Subproject commit bf085a4a188010fac212e0f80e133c777857552b
|
|
@ -41,9 +41,8 @@ bool fileContentIndexer::index()
|
||||||
}
|
}
|
||||||
|
|
||||||
m_document.setData(content);
|
m_document.setData(content);
|
||||||
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
|
||||||
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000));
|
||||||
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
|
|
||||||
content.clear();
|
content.clear();
|
||||||
content.squeeze();
|
content.squeeze();
|
||||||
|
|
||||||
|
|
|
@ -274,7 +274,7 @@ int FileContentSearch::keywordSearchContent() {
|
||||||
qp.set_default_op(Xapian::Query::OP_AND);
|
qp.set_default_op(Xapian::Query::OP_AND);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
|
|
||||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
|
||||||
//Creat a query
|
//Creat a query
|
||||||
std::string words;
|
std::string words;
|
||||||
for(size_t i = 0; i < sKeyWord.size(); i++) {
|
for(size_t i = 0; i < sKeyWord.size(); i++) {
|
||||||
|
@ -414,7 +414,7 @@ int OcrSearch::keywordSearchOcr() {
|
||||||
Xapian::QueryParser qp;
|
Xapian::QueryParser qp;
|
||||||
qp.set_default_op(Xapian::Query::OP_AND);
|
qp.set_default_op(Xapian::Query::OP_AND);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
|
||||||
//Creat a query
|
//Creat a query
|
||||||
std::string words;
|
std::string words;
|
||||||
for(int i = 0; i < sKeyWord.size(); i++) {
|
for(int i = 0; i < sKeyWord.size(); i++) {
|
||||||
|
|
|
@ -180,7 +180,7 @@ NoteSearch::NoteSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, c
|
||||||
}
|
}
|
||||||
|
|
||||||
void NoteSearch::run() {
|
void NoteSearch::run() {
|
||||||
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword);
|
||||||
QStringList keywordList;
|
QStringList keywordList;
|
||||||
for (KeyWord sKeyWord : sKeyWordVec) {
|
for (KeyWord sKeyWord : sKeyWordVec) {
|
||||||
keywordList.append(QString::fromStdString(sKeyWord.word));
|
keywordList.append(QString::fromStdString(sKeyWord.word));
|
||||||
|
|
|
@ -179,8 +179,8 @@ inline Xapian::Query FileContentSearchWorker::createQuery()
|
||||||
{
|
{
|
||||||
std::vector<Xapian::Query> v;
|
std::vector<Xapian::Query> v;
|
||||||
|
|
||||||
for (const auto &keyword : m_searchController->getKeyword()) {
|
for (auto &keyword : m_searchController->getKeyword()) {
|
||||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword.toStdString());
|
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword);
|
||||||
|
|
||||||
for(const auto & c : sKeyWord) {
|
for(const auto & c : sKeyWord) {
|
||||||
v.emplace_back(c.word);
|
v.emplace_back(c.word);
|
||||||
|
|
Loading…
Reference in New Issue