Merge pull request #143 from iaom/0219-dev

Optimized Chinese phonetic alphabet index.
This commit is contained in:
Mouse Zhang 2021-02-20 17:47:35 +08:00 committed by GitHub
commit 53e1cc3eb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 43 additions and 21 deletions

View File

@ -50,14 +50,14 @@ void ConstructDocumentForPath::run()
//多音字版
//现加入首字母
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
if(!pinyin_text_list.isEmpty())
{
for (QString& i : pinyin_text_list){
i.replace("", " ");
i = i.simplified();
}
doc.setIndexText(pinyin_text_list);
}
// if(!pinyin_text_list.isEmpty())
// {
// for (QString& i : pinyin_text_list){
// i.replace("", " ");
// i = i.simplified();
// }
// doc.setIndexText(pinyin_text_list);
// }
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
@ -78,11 +78,22 @@ void ConstructDocumentForPath::run()
int postingCount = 0;
while(postingCount < index_text.size())
{
QVector<size_t> p;
p.append(postingCount);
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),p);
// QVector<size_t> p;
// p.append(postingCount);
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),postingCount);
++postingCount;
}
int i = 0;
for (QString& s : pinyin_text_list)
{
i = 0;
while(i < s.size())
{
doc.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(),postingCount);
++postingCount;
++i;
}
}
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
_mutex_doc_list_path.lock();

View File

@ -40,6 +40,16 @@ void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
}
}
void Document::addPosting(std::string term, unsigned int offset, int weight)
{
if(term == "")
return;
if(term.length() > 240)
term = QString::fromStdString(term).left(30).toStdString();
m_document.add_posting(term,offset,weight);
}
void Document::addTerm(QString term)
{
if(term.isEmpty())

View File

@ -42,6 +42,7 @@ public:
}
void setData(QString data);
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
void addPosting(std::string term, unsigned int offset, int weight =1);
void addTerm(QString term);
void addValue(QString value);
void setUniqueTerm(QString term);

View File

@ -246,11 +246,11 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
}
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
// Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
// Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
// qDebug()<<QString::fromStdString(query.get_description());
return query;
return queryPhrase;
}
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db)

View File

@ -194,17 +194,17 @@ void IndexGenerator::insertIntoDatabase(Document& doc)
{
// qDebug()<< "--index start--";
Xapian::Document document = doc.getXapianDocument();
m_indexer.set_document(document);
// m_indexer.set_document(document);
// qDebug()<<doc.getIndexText();
for(auto i : doc.getIndexText()){
m_indexer.index_text(i.toStdString());
}
qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
qDebug()<<"document:"<<QString::fromStdString(document.get_description());
// for(auto i : doc.getIndexText()){
// m_indexer.index_text(i.toStdString());
// }
// qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
// qDebug()<<"document:"<<QString::fromStdString(document.get_description());
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<< "--index finish--";
return;
}