Merge pull request #95 from iaom/0119-dev
Optimized docx parsing method;
This commit is contained in:
commit
9765bd0cd4
|
@ -16,6 +16,11 @@ struct SKeyWord{
|
||||||
std::string word;
|
std::string word;
|
||||||
QVector<size_t> offsets;
|
QVector<size_t> offsets;
|
||||||
double weight;
|
double weight;
|
||||||
|
~SKeyWord(){
|
||||||
|
word = std::move("");
|
||||||
|
offsets.clear();
|
||||||
|
offsets.shrink_to_fit();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
||||||
|
|
|
@ -493,18 +493,24 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
||||||
QDomDocument doc;
|
QDomDocument doc;
|
||||||
doc.setContent(fileR.readAll());
|
doc.setContent(fileR.readAll());
|
||||||
QDomElement first = doc.firstChildElement("w:document");
|
QDomElement first = doc.firstChildElement("w:document");
|
||||||
first = first.firstChildElement().firstChildElement();
|
QDomElement body = first.firstChildElement("w:body");
|
||||||
while(!first.isNull())
|
while(!body.isNull())
|
||||||
{
|
{
|
||||||
QDomElement wr= first.firstChildElement("w:r");
|
QDomElement wp= body.firstChildElement("w:p");
|
||||||
|
while(!wp.isNull())
|
||||||
|
{
|
||||||
|
QDomElement wr= wp.firstChildElement("w:r");
|
||||||
while(!wr.isNull())
|
while(!wr.isNull())
|
||||||
{
|
{
|
||||||
QDomElement wt = wr.firstChildElement("w:t");
|
QDomElement wt = wr.firstChildElement("w:t");
|
||||||
textcontent.append(wt.text().replace("\n",""));
|
textcontent.append(wt.text().replace("\n",""));
|
||||||
wr = wr.nextSiblingElement();
|
wr = wr.nextSiblingElement();
|
||||||
}
|
}
|
||||||
first = first.nextSiblingElement();
|
wp = wp.nextSiblingElement();
|
||||||
}
|
}
|
||||||
|
body = body.nextSiblingElement();
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ ConstructDocumentForContent::~ConstructDocumentForContent()
|
||||||
|
|
||||||
void ConstructDocumentForContent::run()
|
void ConstructDocumentForContent::run()
|
||||||
{
|
{
|
||||||
qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
||||||
// 构造文本索引的document
|
// 构造文本索引的document
|
||||||
if (!_doc_list_content)
|
if (!_doc_list_content)
|
||||||
_doc_list_content = new QList<Document>;
|
_doc_list_content = new QList<Document>;
|
||||||
|
|
|
@ -97,7 +97,7 @@ void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResult
|
||||||
|
|
||||||
while(total<20)
|
while(total<20)
|
||||||
{
|
{
|
||||||
keywordSearchContent(uniqueSymbol3,keyword,begin,num);
|
resultCount = keywordSearchContent(uniqueSymbol3,keyword,begin,num);
|
||||||
if(resultCount == 0 || resultCount == -1)
|
if(resultCount == 0 || resultCount == -1)
|
||||||
break;
|
break;
|
||||||
total += resultCount;
|
total += resultCount;
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <QtConcurrent>
|
#include <QtConcurrent>
|
||||||
#include <QFuture>
|
#include <QFuture>
|
||||||
#include <QThreadPool>
|
#include <QThreadPool>
|
||||||
|
#include <QFile>
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "index-generator.h"
|
#include "index-generator.h"
|
||||||
#include "global-settings.h"
|
#include "global-settings.h"
|
||||||
|
@ -113,6 +114,13 @@ IndexGenerator::IndexGenerator(bool rebuild, QObject *parent) : QObject(parent)
|
||||||
{
|
{
|
||||||
if(rebuild)
|
if(rebuild)
|
||||||
{
|
{
|
||||||
|
QDir database(QString::fromStdString(INDEX_PATH));
|
||||||
|
if(database.exists())
|
||||||
|
database.removeRecursively();
|
||||||
|
database.setPath(QString::fromStdString(CONTENT_INDEX_PATH));
|
||||||
|
if(database.exists())
|
||||||
|
database.removeRecursively();
|
||||||
|
|
||||||
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||||
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||||
}
|
}
|
||||||
|
@ -198,14 +206,13 @@ void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
|
||||||
// m_doc_list_path = new QList<Document>(docList);
|
// m_doc_list_path = new QList<Document>(docList);
|
||||||
QThreadPool pool;
|
QThreadPool pool;
|
||||||
// pool.setMaxThreadCount(1);
|
// pool.setMaxThreadCount(1);
|
||||||
|
pool.setExpiryTimeout(100);
|
||||||
ConstructDocumentForPath *constructer;
|
ConstructDocumentForPath *constructer;
|
||||||
while(!messageList->isEmpty())
|
while(!messageList->isEmpty())
|
||||||
{
|
{
|
||||||
constructer = new ConstructDocumentForPath(messageList->dequeue());
|
constructer = new ConstructDocumentForPath(messageList->dequeue());
|
||||||
pool.start(constructer);
|
pool.start(constructer);
|
||||||
}
|
}
|
||||||
// while(!pool.waitForDone(1))
|
|
||||||
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
|
||||||
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||||
// if(constructer)
|
// if(constructer)
|
||||||
// delete constructer;
|
// delete constructer;
|
||||||
|
@ -231,14 +238,12 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
|
||||||
ConstructDocumentForContent *constructer;
|
ConstructDocumentForContent *constructer;
|
||||||
QThreadPool pool;
|
QThreadPool pool;
|
||||||
// pool.setMaxThreadCount(2);
|
// pool.setMaxThreadCount(2);
|
||||||
pool.setExpiryTimeout(1000);
|
pool.setExpiryTimeout(100);
|
||||||
while(!messageList->isEmpty())
|
while(!messageList->isEmpty())
|
||||||
{
|
{
|
||||||
constructer = new ConstructDocumentForContent(messageList->dequeue());
|
constructer = new ConstructDocumentForContent(messageList->dequeue());
|
||||||
pool.start(constructer);
|
pool.start(constructer);
|
||||||
}
|
}
|
||||||
// while(!pool.waitForDone(1))
|
|
||||||
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
|
||||||
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||||
// if(constructer)
|
// if(constructer)
|
||||||
// delete constructer;
|
// delete constructer;
|
||||||
|
|
Loading…
Reference in New Issue