Merge pull request #95 from iaom/0119-dev
Optimized docx parsing method;
This commit is contained in:
commit
9765bd0cd4
|
@ -16,6 +16,11 @@ struct SKeyWord{
|
|||
std::string word;
|
||||
QVector<size_t> offsets;
|
||||
double weight;
|
||||
~SKeyWord(){
|
||||
word = std::move("");
|
||||
offsets.clear();
|
||||
offsets.shrink_to_fit();
|
||||
}
|
||||
};
|
||||
|
||||
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
||||
|
|
|
@ -493,18 +493,24 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
QDomDocument doc;
|
||||
doc.setContent(fileR.readAll());
|
||||
QDomElement first = doc.firstChildElement("w:document");
|
||||
first = first.firstChildElement().firstChildElement();
|
||||
while(!first.isNull())
|
||||
QDomElement body = first.firstChildElement("w:body");
|
||||
while(!body.isNull())
|
||||
{
|
||||
QDomElement wr= first.firstChildElement("w:r");
|
||||
QDomElement wp= body.firstChildElement("w:p");
|
||||
while(!wp.isNull())
|
||||
{
|
||||
QDomElement wr= wp.firstChildElement("w:r");
|
||||
while(!wr.isNull())
|
||||
{
|
||||
QDomElement wt = wr.firstChildElement("w:t");
|
||||
textcontent.append(wt.text().replace("\n",""));
|
||||
wr = wr.nextSiblingElement();
|
||||
}
|
||||
first = first.nextSiblingElement();
|
||||
wp = wp.nextSiblingElement();
|
||||
}
|
||||
body = body.nextSiblingElement();
|
||||
}
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ ConstructDocumentForContent::~ConstructDocumentForContent()
|
|||
|
||||
void ConstructDocumentForContent::run()
|
||||
{
|
||||
qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
||||
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
||||
// 构造文本索引的document
|
||||
if (!_doc_list_content)
|
||||
_doc_list_content = new QList<Document>;
|
||||
|
|
|
@ -97,7 +97,7 @@ void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResult
|
|||
|
||||
while(total<20)
|
||||
{
|
||||
keywordSearchContent(uniqueSymbol3,keyword,begin,num);
|
||||
resultCount = keywordSearchContent(uniqueSymbol3,keyword,begin,num);
|
||||
if(resultCount == 0 || resultCount == -1)
|
||||
break;
|
||||
total += resultCount;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <QtConcurrent>
|
||||
#include <QFuture>
|
||||
#include <QThreadPool>
|
||||
#include <QFile>
|
||||
#include "file-utils.h"
|
||||
#include "index-generator.h"
|
||||
#include "global-settings.h"
|
||||
|
@ -113,6 +114,13 @@ IndexGenerator::IndexGenerator(bool rebuild, QObject *parent) : QObject(parent)
|
|||
{
|
||||
if(rebuild)
|
||||
{
|
||||
QDir database(QString::fromStdString(INDEX_PATH));
|
||||
if(database.exists())
|
||||
database.removeRecursively();
|
||||
database.setPath(QString::fromStdString(CONTENT_INDEX_PATH));
|
||||
if(database.exists())
|
||||
database.removeRecursively();
|
||||
|
||||
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
}
|
||||
|
@ -198,14 +206,13 @@ void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
|
|||
// m_doc_list_path = new QList<Document>(docList);
|
||||
QThreadPool pool;
|
||||
// pool.setMaxThreadCount(1);
|
||||
pool.setExpiryTimeout(100);
|
||||
ConstructDocumentForPath *constructer;
|
||||
while(!messageList->isEmpty())
|
||||
{
|
||||
constructer = new ConstructDocumentForPath(messageList->dequeue());
|
||||
pool.start(constructer);
|
||||
}
|
||||
// while(!pool.waitForDone(1))
|
||||
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
||||
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||
// if(constructer)
|
||||
// delete constructer;
|
||||
|
@ -231,14 +238,12 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
|
|||
ConstructDocumentForContent *constructer;
|
||||
QThreadPool pool;
|
||||
// pool.setMaxThreadCount(2);
|
||||
pool.setExpiryTimeout(1000);
|
||||
pool.setExpiryTimeout(100);
|
||||
while(!messageList->isEmpty())
|
||||
{
|
||||
constructer = new ConstructDocumentForContent(messageList->dequeue());
|
||||
pool.start(constructer);
|
||||
}
|
||||
// while(!pool.waitForDone(1))
|
||||
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
||||
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||
// if(constructer)
|
||||
// delete constructer;
|
||||
|
|
Loading…
Reference in New Issue