Merge pull request #143 from iaom/0219-dev
Optimized Chinese phonetic alphabet index.
This commit is contained in:
commit
53e1cc3eb9
|
@ -50,14 +50,14 @@ void ConstructDocumentForPath::run()
|
||||||
//多音字版
|
//多音字版
|
||||||
//现加入首字母
|
//现加入首字母
|
||||||
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
|
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
|
||||||
if(!pinyin_text_list.isEmpty())
|
// if(!pinyin_text_list.isEmpty())
|
||||||
{
|
// {
|
||||||
for (QString& i : pinyin_text_list){
|
// for (QString& i : pinyin_text_list){
|
||||||
i.replace("", " ");
|
// i.replace("", " ");
|
||||||
i = i.simplified();
|
// i = i.simplified();
|
||||||
}
|
// }
|
||||||
doc.setIndexText(pinyin_text_list);
|
// doc.setIndexText(pinyin_text_list);
|
||||||
}
|
// }
|
||||||
|
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
|
||||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||||
|
@ -78,11 +78,22 @@ void ConstructDocumentForPath::run()
|
||||||
int postingCount = 0;
|
int postingCount = 0;
|
||||||
while(postingCount < index_text.size())
|
while(postingCount < index_text.size())
|
||||||
{
|
{
|
||||||
QVector<size_t> p;
|
// QVector<size_t> p;
|
||||||
p.append(postingCount);
|
// p.append(postingCount);
|
||||||
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),p);
|
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),postingCount);
|
||||||
++postingCount;
|
++postingCount;
|
||||||
}
|
}
|
||||||
|
int i = 0;
|
||||||
|
for (QString& s : pinyin_text_list)
|
||||||
|
{
|
||||||
|
i = 0;
|
||||||
|
while(i < s.size())
|
||||||
|
{
|
||||||
|
doc.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(),postingCount);
|
||||||
|
++postingCount;
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
|
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
|
||||||
_mutex_doc_list_path.lock();
|
_mutex_doc_list_path.lock();
|
||||||
|
|
|
@ -40,6 +40,16 @@ void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Document::addPosting(std::string term, unsigned int offset, int weight)
|
||||||
|
{
|
||||||
|
if(term == "")
|
||||||
|
return;
|
||||||
|
if(term.length() > 240)
|
||||||
|
term = QString::fromStdString(term).left(30).toStdString();
|
||||||
|
|
||||||
|
m_document.add_posting(term,offset,weight);
|
||||||
|
}
|
||||||
|
|
||||||
void Document::addTerm(QString term)
|
void Document::addTerm(QString term)
|
||||||
{
|
{
|
||||||
if(term.isEmpty())
|
if(term.isEmpty())
|
||||||
|
|
|
@ -42,6 +42,7 @@ public:
|
||||||
}
|
}
|
||||||
void setData(QString data);
|
void setData(QString data);
|
||||||
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
|
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
|
||||||
|
void addPosting(std::string term, unsigned int offset, int weight =1);
|
||||||
void addTerm(QString term);
|
void addTerm(QString term);
|
||||||
void addValue(QString value);
|
void addValue(QString value);
|
||||||
void setUniqueTerm(QString term);
|
void setUniqueTerm(QString term);
|
||||||
|
|
|
@ -246,11 +246,11 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
|
||||||
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
|
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
|
||||||
}
|
}
|
||||||
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
|
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
|
||||||
Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
|
// Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
|
||||||
|
|
||||||
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
|
// Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
|
||||||
// qDebug()<<QString::fromStdString(query.get_description());
|
// qDebug()<<QString::fromStdString(query.get_description());
|
||||||
return query;
|
return queryPhrase;
|
||||||
}
|
}
|
||||||
|
|
||||||
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db)
|
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db)
|
||||||
|
|
|
@ -194,17 +194,17 @@ void IndexGenerator::insertIntoDatabase(Document& doc)
|
||||||
{
|
{
|
||||||
// qDebug()<< "--index start--";
|
// qDebug()<< "--index start--";
|
||||||
Xapian::Document document = doc.getXapianDocument();
|
Xapian::Document document = doc.getXapianDocument();
|
||||||
m_indexer.set_document(document);
|
// m_indexer.set_document(document);
|
||||||
// qDebug()<<doc.getIndexText();
|
// qDebug()<<doc.getIndexText();
|
||||||
|
|
||||||
for(auto i : doc.getIndexText()){
|
// for(auto i : doc.getIndexText()){
|
||||||
m_indexer.index_text(i.toStdString());
|
// m_indexer.index_text(i.toStdString());
|
||||||
}
|
// }
|
||||||
qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
|
// qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
|
||||||
qDebug()<<"document:"<<QString::fromStdString(document.get_description());
|
// qDebug()<<"document:"<<QString::fromStdString(document.get_description());
|
||||||
|
|
||||||
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
|
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
|
||||||
qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
|
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
|
||||||
// qDebug()<< "--index finish--";
|
// qDebug()<< "--index finish--";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue