Merge pull request #143 from iaom/0219-dev

Optimized Chinese phonetic alphabet index.
This commit is contained in:
Mouse Zhang 2021-02-20 17:47:35 +08:00 committed by GitHub
commit 53e1cc3eb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 43 additions and 21 deletions

View File

@ -50,14 +50,14 @@ void ConstructDocumentForPath::run()
//多音字版 //多音字版
//现加入首字母 //现加入首字母
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".","")); QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
if(!pinyin_text_list.isEmpty()) // if(!pinyin_text_list.isEmpty())
{ // {
for (QString& i : pinyin_text_list){ // for (QString& i : pinyin_text_list){
i.replace("", " "); // i.replace("", " ");
i = i.simplified(); // i = i.simplified();
} // }
doc.setIndexText(pinyin_text_list); // doc.setIndexText(pinyin_text_list);
} // }
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath)); QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep))); QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
@ -78,11 +78,22 @@ void ConstructDocumentForPath::run()
int postingCount = 0; int postingCount = 0;
while(postingCount < index_text.size()) while(postingCount < index_text.size())
{ {
QVector<size_t> p; // QVector<size_t> p;
p.append(postingCount); // p.append(postingCount);
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),p); doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),postingCount);
++postingCount; ++postingCount;
} }
int i = 0;
for (QString& s : pinyin_text_list)
{
i = 0;
while(i < s.size())
{
doc.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(),postingCount);
++postingCount;
++i;
}
}
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc)); // QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
_mutex_doc_list_path.lock(); _mutex_doc_list_path.lock();

View File

@ -40,6 +40,16 @@ void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
} }
} }
void Document::addPosting(std::string term, unsigned int offset, int weight)
{
if(term == "")
return;
if(term.length() > 240)
term = QString::fromStdString(term).left(30).toStdString();
m_document.add_posting(term,offset,weight);
}
void Document::addTerm(QString term) void Document::addTerm(QString term)
{ {
if(term.isEmpty()) if(term.isEmpty())

View File

@ -42,6 +42,7 @@ public:
} }
void setData(QString data); void setData(QString data);
void addPosting(std::string term, QVector<size_t> offset, int weight =1); void addPosting(std::string term, QVector<size_t> offset, int weight =1);
void addPosting(std::string term, unsigned int offset, int weight =1);
void addTerm(QString term); void addTerm(QString term);
void addValue(QString value); void addValue(QString value);
void setUniqueTerm(QString term); void setUniqueTerm(QString term);

View File

@ -246,11 +246,11 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description()); // qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
} }
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end()); Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end()); // Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase); // Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
// qDebug()<<QString::fromStdString(query.get_description()); // qDebug()<<QString::fromStdString(query.get_description());
return query; return queryPhrase;
} }
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db) Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db)

View File

@ -194,17 +194,17 @@ void IndexGenerator::insertIntoDatabase(Document& doc)
{ {
// qDebug()<< "--index start--"; // qDebug()<< "--index start--";
Xapian::Document document = doc.getXapianDocument(); Xapian::Document document = doc.getXapianDocument();
m_indexer.set_document(document); // m_indexer.set_document(document);
// qDebug()<<doc.getIndexText(); // qDebug()<<doc.getIndexText();
for(auto i : doc.getIndexText()){ // for(auto i : doc.getIndexText()){
m_indexer.index_text(i.toStdString()); // m_indexer.index_text(i.toStdString());
} // }
qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data()); // qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
qDebug()<<"document:"<<QString::fromStdString(document.get_description()); // qDebug()<<"document:"<<QString::fromStdString(document.get_description());
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document); Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
qDebug()<<"replace doc docid="<<static_cast<int>(innerId); // qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<< "--index finish--"; // qDebug()<< "--index finish--";
return; return;
} }