Merge pull request #143 from iaom/0219-dev
Optimized Chinese phonetic alphabet index.
This commit is contained in:
commit
53e1cc3eb9
|
@ -50,14 +50,14 @@ void ConstructDocumentForPath::run()
|
|||
//多音字版
|
||||
//现加入首字母
|
||||
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
|
||||
if(!pinyin_text_list.isEmpty())
|
||||
{
|
||||
for (QString& i : pinyin_text_list){
|
||||
i.replace("", " ");
|
||||
i = i.simplified();
|
||||
}
|
||||
doc.setIndexText(pinyin_text_list);
|
||||
}
|
||||
// if(!pinyin_text_list.isEmpty())
|
||||
// {
|
||||
// for (QString& i : pinyin_text_list){
|
||||
// i.replace("", " ");
|
||||
// i = i.simplified();
|
||||
// }
|
||||
// doc.setIndexText(pinyin_text_list);
|
||||
// }
|
||||
|
||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
|
||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||
|
@ -78,11 +78,22 @@ void ConstructDocumentForPath::run()
|
|||
int postingCount = 0;
|
||||
while(postingCount < index_text.size())
|
||||
{
|
||||
QVector<size_t> p;
|
||||
p.append(postingCount);
|
||||
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),p);
|
||||
// QVector<size_t> p;
|
||||
// p.append(postingCount);
|
||||
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),postingCount);
|
||||
++postingCount;
|
||||
}
|
||||
int i = 0;
|
||||
for (QString& s : pinyin_text_list)
|
||||
{
|
||||
i = 0;
|
||||
while(i < s.size())
|
||||
{
|
||||
doc.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(),postingCount);
|
||||
++postingCount;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
|
||||
_mutex_doc_list_path.lock();
|
||||
|
|
|
@ -40,6 +40,16 @@ void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
|
|||
}
|
||||
}
|
||||
|
||||
void Document::addPosting(std::string term, unsigned int offset, int weight)
|
||||
{
|
||||
if(term == "")
|
||||
return;
|
||||
if(term.length() > 240)
|
||||
term = QString::fromStdString(term).left(30).toStdString();
|
||||
|
||||
m_document.add_posting(term,offset,weight);
|
||||
}
|
||||
|
||||
void Document::addTerm(QString term)
|
||||
{
|
||||
if(term.isEmpty())
|
||||
|
|
|
@ -42,6 +42,7 @@ public:
|
|||
}
|
||||
void setData(QString data);
|
||||
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
|
||||
void addPosting(std::string term, unsigned int offset, int weight =1);
|
||||
void addTerm(QString term);
|
||||
void addValue(QString value);
|
||||
void setUniqueTerm(QString term);
|
||||
|
|
|
@ -246,11 +246,11 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
|
|||
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
|
||||
}
|
||||
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
|
||||
Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
|
||||
// Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
|
||||
|
||||
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
|
||||
// Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
|
||||
// qDebug()<<QString::fromStdString(query.get_description());
|
||||
return query;
|
||||
return queryPhrase;
|
||||
}
|
||||
|
||||
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db)
|
||||
|
|
|
@ -194,17 +194,17 @@ void IndexGenerator::insertIntoDatabase(Document& doc)
|
|||
{
|
||||
// qDebug()<< "--index start--";
|
||||
Xapian::Document document = doc.getXapianDocument();
|
||||
m_indexer.set_document(document);
|
||||
// m_indexer.set_document(document);
|
||||
// qDebug()<<doc.getIndexText();
|
||||
|
||||
for(auto i : doc.getIndexText()){
|
||||
m_indexer.index_text(i.toStdString());
|
||||
}
|
||||
qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
|
||||
qDebug()<<"document:"<<QString::fromStdString(document.get_description());
|
||||
// for(auto i : doc.getIndexText()){
|
||||
// m_indexer.index_text(i.toStdString());
|
||||
// }
|
||||
// qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
|
||||
// qDebug()<<"document:"<<QString::fromStdString(document.get_description());
|
||||
|
||||
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
|
||||
qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
|
||||
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
|
||||
// qDebug()<< "--index finish--";
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue