/* * Copyright (C) 2020, KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * Authors: zhangpengfei * */ #include "search-manager.h" #include "dir-watcher.h" using namespace UkuiSearch; size_t SearchManager::uniqueSymbolFile = 0; size_t SearchManager::uniqueSymbolDir = 0; size_t SearchManager::uniqueSymbolContent = 0; size_t SearchManager::uniqueSymbolOcr = 0; QMutex SearchManager::m_mutexFile; QMutex SearchManager::m_mutexDir; QMutex SearchManager::m_mutexContent; QMutex SearchManager::m_mutexOcr; SearchManager::SearchManager(QObject *parent) : QObject(parent) { } SearchManager::~SearchManager() { } int SearchManager::getCurrentIndexCount() { try { Xapian::Database db(INDEX_PATH); return db.get_doccount(); } catch(const Xapian::Error &e) { qWarning() << QString::fromStdString(e.get_description()); return 0; } } bool SearchManager::isBlocked(QString &path) { QStringList blockList = DirWatcher::getDirWatcher()->getBlockDirsOfUser(); for(QString i : blockList) { if(FileUtils::isOrUnder(path, i)) return true; } return false; } bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString path) { QFileInfo info(path); if(!info.exists()) { return false; } ri.icon = FileUtils::getFileIcon(QUrl::fromLocalFile(path).toString(), false); ri.name = info.fileName(); ri.description = QVector() \ << SearchPluginIface::DescriptionInfo{tr("Path:"), path} \ << SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")}; ri.actionKey = path; if (true == targetPhotographTypeMap[info.suffix()]) { ri.type = 1;//1为ocr图片文件 } else { ri.type = 0;//0为默认文本文件 } return true; } FileSearch::FileSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) { this->setAutoDelete(true); m_search_result = searchResult; m_uniqueSymbol = uniqueSymbol; m_keyword = keyword; m_value = value; m_slot = slot; m_begin = begin; m_num = num; m_matchDecider = new FileMatchDecider(); } FileSearch::~FileSearch() { m_search_result = nullptr; if(m_matchDecider) delete m_matchDecider; } void FileSearch::run() { if(m_value == "0") { SearchManager::m_mutexFile.lock(); if(!m_search_result->isEmpty()) { m_search_result->clear(); } SearchManager::m_mutexFile.unlock(); } else if(m_value == "1") { SearchManager::m_mutexDir.lock(); if(!m_search_result->isEmpty()) { m_search_result->clear(); } SearchManager::m_mutexDir.unlock(); } //目前的需求是文件搜索数量无上限。 //但如果不设置单次搜索数量限制,在一些性能非常弱的机器上(如兆芯某些机器),就算我们这里不阻塞UI,也会因为搜索本身占用cpu过多(可能)导致UI卡顿。 //可能会有更好的方法,待优化。 m_begin = 0; m_num = 100; int resultCount = 1; int totalCount = 0; while(resultCount > 0) { resultCount = keywordSearchfile(); m_begin += m_num; totalCount += resultCount; } qDebug() << "Total count:" << m_value << totalCount; return; } int FileSearch::keywordSearchfile() { try { qDebug() << "--keywordSearchfile start--"; Xapian::Database db(INDEX_PATH); Xapian::Query query = creatQueryForFileSearch(); Xapian::Enquire enquire(db); Xapian::Query queryFile; if(!m_value.isEmpty()) { std::string slotValue = m_value.toStdString(); Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, m_slot, slotValue, slotValue); queryFile = Xapian::Query(Xapian::Query::OP_AND, query, queryValue); } else { queryFile = query; } qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description()); enquire.set_query(queryFile); enquire.set_docid_order(enquire.DONT_CARE); enquire.set_sort_by_relevance_then_value(2, true); Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); int resultCount = result.size(); qDebug() << "keywordSearchfile results count=" << resultCount; if(resultCount == 0) return 0; if(getResult(result) == -1) return -1; qDebug() << "--keywordSearchfile finish--"; return resultCount; } catch(const Xapian::Error &e) { qWarning() << QString::fromStdString(e.get_description()); qDebug() << "--keywordSearchfile finish--"; return -1; } } Xapian::Query FileSearch::creatQueryForFileSearch() { auto userInput = m_keyword.toLower(); std::vector v; for(int i = 0; i < userInput.size(); i++) { v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString())); // qDebug()<enqueue(ri); SearchManager::m_mutexDir.unlock(); } else { SearchManager::m_mutexDir.unlock(); return -1; } break; case 0: SearchManager::m_mutexFile.lock(); if(m_uniqueSymbol == SearchManager::uniqueSymbolFile) { m_search_result->enqueue(ri); SearchManager::m_mutexFile.unlock(); } else { SearchManager::m_mutexFile.unlock(); return -1; } break; default: break; } } // qDebug() << "doc=" << path // << ",weight=" // << docScoreWeight // << ",percent=" // << docScorePercent // << "date" // << QString::fromStdString(date); } // if(!pathTobeDelete->isEmpty()) // deleteAllIndex(pathTobeDelete) return 0; } FileContentSearch::FileContentSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, bool fuzzy, int begin, int num) :m_search_result(searchResult), m_uniqueSymbol(uniqueSymbol), m_keyword(keyword), m_fuzzy(fuzzy), m_begin(begin), m_num(num) { this->setAutoDelete(true); m_matchDecider = new FileContentMatchDecider(); } FileContentSearch::~FileContentSearch() { m_search_result = nullptr; if(m_matchDecider) delete m_matchDecider; } void FileContentSearch::run() { SearchManager::m_mutexContent.lock(); if(!m_search_result->isEmpty()) { m_search_result->clear(); } SearchManager::m_mutexContent.unlock(); //这里同文件搜索,待优化。 m_begin = 0; m_num = 100; int resultCount = 1; int totalCount = 0; while(resultCount > 0) { resultCount = keywordSearchContent(); m_begin += m_num; totalCount += resultCount; } qDebug() << "Total count:" << totalCount; return; } int FileContentSearch::keywordSearchContent() { try { qDebug() << "--keywordSearchContent search start--"; Xapian::Database db(CONTENT_INDEX_PATH); Xapian::Enquire enquire(db); Xapian::QueryParser qp; qp.set_default_op(Xapian::Query::OP_AND); qp.set_database(db); std::vector sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword); //Creat a query std::string words; for(size_t i = 0; i < sKeyWord.size(); i++) { words.append(sKeyWord.at(i).word).append(" "); } // Xapian::Query query = qp.parse_query(words); std::vector v; for(size_t i = 0; i < sKeyWord.size(); i++) { v.push_back(Xapian::Query(sKeyWord.at(i).word)); // qDebug() << QString::fromStdString(sKeyWord.at(i).word); } Xapian::Query query; if(m_fuzzy) { query = Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end()); } else { query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()); } qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description()); enquire.set_query(query); Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); int resultCount = result.size(); if(result.size() == 0) { return 0; } qDebug() << "keywordSearchContent results count=" << resultCount; if(getResult(result, words) == -1) { return -1; } qDebug() << "--keywordSearchContent search finish--"; return resultCount; } catch(const Xapian::Error &e) { qWarning() << QString::fromStdString(e.get_description()); qDebug() << "--keywordSearchContent search finish--"; return -1; } } int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) { for(auto it = result.begin(); it != result.end(); ++it) { Xapian::Document doc = it.get_document(); std::string data = doc.get_data(); // double docScoreWeight = it.get_weight(); // Xapian::percent docScorePercent = it.get_percent(); QString path = QString::fromStdString(doc.get_value(1)); // QString suffix = QString::fromStdString(doc.get_value(2)); SearchPluginIface::ResultInfo ri; if(!SearchManager::creatResultInfo(ri, path)) { continue; } // Construct snippets containing keyword. auto termIterator = doc.termlist_begin(); QStringList words = QString::fromStdString(keyWord).split(" ", QString::SkipEmptyParts); for(const QString& wordTobeFound : words) { std::string term = wordTobeFound.toStdString(); termIterator.skip_to(term); if(termIterator == doc.termlist_end()) { termIterator = doc.termlist_begin(); continue; } if(term == *termIterator) { break; } else { termIterator = doc.termlist_begin(); } } auto pos = termIterator.positionlist_begin(); QString snippet = FileUtils::chineseSubString(data,*pos,120); ri.description.prepend(SearchPluginIface::DescriptionInfo{"",snippet}); QString().swap(snippet); std::string().swap(data); SearchManager::m_mutexContent.lock(); if(m_uniqueSymbol == SearchManager::uniqueSymbolContent) { m_search_result->enqueue(ri); SearchManager::m_mutexContent.unlock(); } else { SearchManager::m_mutexContent.unlock(); return -1; } //qDebug() << "path=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent; } return 0; } OcrSearch::OcrSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) { this->setAutoDelete(true); m_search_result = searchResult; m_uniqueSymbol = uniqueSymbol; m_keyword = keyword; m_begin = begin; m_num = num; m_matchDecider = new OcrMatchDecider(); } OcrSearch::~OcrSearch() { m_search_result = nullptr; if(m_matchDecider) delete m_matchDecider; } void OcrSearch::run() { SearchManager::m_mutexOcr.lock(); if(!m_search_result->isEmpty()) { m_search_result->clear(); } SearchManager::m_mutexOcr.unlock(); //这里同文件搜索,待优化。 m_begin = 0; m_num = 100; int resultCount = 1; int totalCount = 0; while(resultCount > 0) { resultCount = keywordSearchOcr(); m_begin += m_num; totalCount += resultCount; } qDebug() << "Total count:" << totalCount; return; } int OcrSearch::keywordSearchOcr() { try { qDebug() << "--keywordSearch OCR search start--"; Xapian::Database db(OCR_INDEX_PATH); Xapian::Enquire enquire(db); Xapian::QueryParser qp; qp.set_default_op(Xapian::Query::OP_AND); qp.set_database(db); std::vector sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword); //Creat a query std::string words; for(size_t i = 0; i < sKeyWord.size(); i++) { words.append(sKeyWord.at(i).word).append(" "); } std::vector v; for(size_t i = 0; i < sKeyWord.size(); i++) { v.push_back(Xapian::Query(sKeyWord.at(i).word)); qDebug() << QString::fromStdString(sKeyWord.at(i).word); } Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()); qDebug() << "keywordSearch OCR:" << QString::fromStdString(query.get_description()); enquire.set_query(query); Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); int resultCount = result.size(); if(result.size() == 0) { return 0; } qDebug() << "keywordSearch OCR results count=" << resultCount; if(getResult(result, words) == -1) { return -1; } qDebug() << "--keywordSearch OCR search finish--"; return resultCount; } catch(const Xapian::Error &e) { qWarning() << QString::fromStdString(e.get_description()); qDebug() << "--keywordSearch OCR search finish--"; return -1; } } int OcrSearch::getResult(Xapian::MSet &result, std::string &keyWord) { for(auto it = result.begin(); it != result.end(); ++it) { Xapian::Document doc = it.get_document(); std::string data = doc.get_data(); QString path = QString::fromStdString(doc.get_value(1)); SearchPluginIface::ResultInfo ri; if(!SearchManager::creatResultInfo(ri, path)) { continue; } // Construct snippets containing keyword. auto term = doc.termlist_begin(); std::string wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0).toStdString(); term.skip_to(wordTobeFound); //fix me: make a snippet without cut cjk char. auto pos = term.positionlist_begin(); QString snippet = FileUtils::chineseSubString(data,*pos,120); ri.description.prepend(SearchPluginIface::DescriptionInfo{"",snippet}); QString().swap(snippet); std::string().swap(data); SearchManager::m_mutexOcr.lock(); if(m_uniqueSymbol == SearchManager::uniqueSymbolOcr) { m_search_result->enqueue(ri); SearchManager::m_mutexOcr.unlock(); } else { SearchManager::m_mutexOcr.unlock(); return -1; } } return 0; } DirectSearch::DirectSearch(QString keyword, DataQueue *searchResult, QString value, size_t uniqueSymbol) { this->setAutoDelete(true); m_keyword = keyword; m_searchResult = searchResult; m_uniqueSymbol = uniqueSymbol; m_value = value; } void DirectSearch::run() { QStringList blockList = DirWatcher::getDirWatcher()->getBlockDirsOfUser(); QStringList searchPath = DirWatcher::getDirWatcher()->currentIndexableDir(); QQueue bfs; for (const QString &path : searchPath) { bool underBlock(false); for (const QString &blockDir : blockList) { if (FileUtils::isOrUnder(path, blockDir)) { underBlock = true; break; } } if (!underBlock) { blockList.append(DirWatcher::getDirWatcher()->blackListOfDir(path)); bfs.enqueue(path); match(QFileInfo(path)); } } if (bfs.isEmpty()) { return; } QFileInfoList list; QDir dir; // QDir::Hidden if(m_value == DIR_SEARCH_VALUE) { dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot); } else { dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); dir.setSorting(QDir::DirsFirst); } while(!bfs.empty()) { dir.setPath(bfs.dequeue()); list = dir.entryInfoList(); for (auto i : list) { if (i.isDir() && (!(i.isSymLink()))) { bool findIndex = false; for (QString j : blockList) { if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) { findIndex = true; break; } } if (findIndex == true) { qDebug() << "path is blocked:" << i.absoluteFilePath(); continue; } bfs.enqueue(i.absoluteFilePath()); } SearchManager::m_mutexDir.lock(); if(m_uniqueSymbol == SearchManager::uniqueSymbolDir) { match(i); SearchManager::m_mutexDir.unlock(); } else { SearchManager::m_mutexDir.unlock(); return; } } } } void DirectSearch::match(const QFileInfo &info) { if(info.fileName().contains(m_keyword, Qt::CaseInsensitive)) { if((info.isDir() && m_value == DIR_SEARCH_VALUE) || (info.isFile() && m_value == FILE_SEARCH_VALUE)) { SearchPluginIface::ResultInfo ri; if(SearchManager::creatResultInfo(ri,info.absoluteFilePath())) { m_searchResult->enqueue(ri); } } } } bool FileMatchDecider::operator ()(const Xapian::Document &doc) const { QString path = QString::fromStdString(doc.get_data()); if(SearchManager::isBlocked(path)) { return false; } return true; } bool FileContentMatchDecider::operator ()(const Xapian::Document &doc) const { QString path = QString::fromStdString(doc.get_value(1)); if(SearchManager::isBlocked(path)) { return false; } return true; } bool OcrMatchDecider::operator ()(const Xapian::Document &doc) const { QString path = QString::fromStdString(doc.get_value(1)); if(SearchManager::isBlocked(path)) { return false; } return true; }