Fix:Index process crashed when parsing some wps templates.
This commit is contained in:
parent
acbc54699a
commit
abce24773c
|
@ -52,10 +52,10 @@ ChineseSegmentation *ChineseSegmentation::getInstance() {
|
||||||
return global_instance_chinese_segmentation;
|
return global_instance_chinese_segmentation;
|
||||||
}
|
}
|
||||||
|
|
||||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString str) {
|
QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
|
||||||
std::string s;
|
// std::string s;
|
||||||
s = str.toStdString();
|
// s = str.toStdString();
|
||||||
str.squeeze();
|
// str.squeeze();
|
||||||
|
|
||||||
const size_t topk = -1;
|
const size_t topk = -1;
|
||||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||||
|
|
|
@ -47,7 +47,7 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
|
||||||
public:
|
public:
|
||||||
static ChineseSegmentation *getInstance();
|
static ChineseSegmentation *getInstance();
|
||||||
~ChineseSegmentation();
|
~ChineseSegmentation();
|
||||||
QVector<SKeyWord> callSegement(QString str);
|
QVector<SKeyWord> callSegement(std::string s);
|
||||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw);
|
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw);
|
||||||
private:
|
private:
|
||||||
static QMutex m_mutex;
|
static QMutex m_mutex;
|
||||||
|
|
|
@ -111,7 +111,7 @@ void ConstructDocumentForContent::run() {
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||||
|
|
||||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000).toStdString());
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "document.h"
|
#include "document.h"
|
||||||
#include <QDebug>
|
#include <QDebug>
|
||||||
using namespace Zeeker;
|
using namespace Zeeker;
|
||||||
void Document::setData(QString data) {
|
void Document::setData(QString &data) {
|
||||||
if(data.isEmpty())
|
if(data.isEmpty())
|
||||||
return;
|
return;
|
||||||
m_document.set_data(data.toStdString());
|
m_document.set_data(data.toStdString());
|
||||||
|
|
|
@ -39,7 +39,7 @@ public:
|
||||||
m_index_text = other.m_index_text;
|
m_index_text = other.m_index_text;
|
||||||
m_unique_term = other.m_unique_term;
|
m_unique_term = other.m_unique_term;
|
||||||
}
|
}
|
||||||
void setData(QString data);
|
void setData(QString &data);
|
||||||
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
|
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
|
||||||
void addPosting(std::string term, unsigned int offset, int weight = 1);
|
void addPosting(std::string term, unsigned int offset, int weight = 1);
|
||||||
void addTerm(QString term);
|
void addTerm(QString term);
|
||||||
|
|
|
@ -354,7 +354,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) {
|
||||||
QString upTerm;
|
QString upTerm;
|
||||||
FileReader::getTextContent(path, content);
|
FileReader::getTextContent(path, content);
|
||||||
|
|
||||||
term = ChineseSegmentation::getInstance()->callSegement(content);
|
term = ChineseSegmentation::getInstance()->callSegement(content.toStdString());
|
||||||
// QStringList term = content.split("");
|
// QStringList term = content.split("");
|
||||||
|
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
|
|
|
@ -273,7 +273,7 @@ int FileContentSearch::keywordSearchContent() {
|
||||||
ret.erase(ret.begin(), ret.end());
|
ret.erase(ret.begin(), ret.end());
|
||||||
::friso::ResultMap().swap(ret);
|
::friso::ResultMap().swap(ret);
|
||||||
*/
|
*/
|
||||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword);
|
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
|
||||||
//Creat a query
|
//Creat a query
|
||||||
std::string words;
|
std::string words;
|
||||||
for(int i = 0; i < sKeyWord.size(); i++) {
|
for(int i = 0; i < sKeyWord.size(); i++) {
|
||||||
|
@ -419,6 +419,7 @@ void DirectSearch::run() {
|
||||||
// QDir::Hidden
|
// QDir::Hidden
|
||||||
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||||
dir.setSorting(QDir::DirsFirst);
|
dir.setSorting(QDir::DirsFirst);
|
||||||
|
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
||||||
while(!bfs.empty()) {
|
while(!bfs.empty()) {
|
||||||
dir.setPath(bfs.dequeue());
|
dir.setPath(bfs.dequeue());
|
||||||
list = dir.entryInfoList();
|
list = dir.entryInfoList();
|
||||||
|
@ -426,8 +427,6 @@ void DirectSearch::run() {
|
||||||
if (i.isDir() && (!(i.isSymLink()))) {
|
if (i.isDir() && (!(i.isSymLink()))) {
|
||||||
|
|
||||||
bool findIndex = false;
|
bool findIndex = false;
|
||||||
|
|
||||||
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
|
||||||
for (QString j : blockList) {
|
for (QString j : blockList) {
|
||||||
if (i.absoluteFilePath().startsWith(j.prepend("/"))) {
|
if (i.absoluteFilePath().startsWith(j.prepend("/"))) {
|
||||||
findIndex = true;
|
findIndex = true;
|
||||||
|
|
|
@ -4457,15 +4457,14 @@ bool bReadBuffer(FILE *pFile, ULONG ulStartBlock,
|
||||||
ULONG ulBegin, ulIndex;
|
ULONG ulBegin, ulIndex;
|
||||||
size_t tLen;
|
size_t tLen;
|
||||||
|
|
||||||
for(ulIndex = ulStartBlock;
|
for(ulIndex = ulStartBlock;ulIndex != END_OF_CHAIN && tToRead != 0;ulIndex = aulBlockDepot[ulIndex]) {
|
||||||
ulIndex != END_OF_CHAIN && tToRead != 0;
|
|
||||||
ulIndex = aulBlockDepot[ulIndex]) {
|
|
||||||
if(ulIndex >= (ULONG)tBlockDepotLen) {
|
if(ulIndex >= (ULONG)tBlockDepotLen) {
|
||||||
if(tBlockSize >= BIG_BLOCK_SIZE) {
|
if(tBlockSize >= BIG_BLOCK_SIZE) {
|
||||||
qWarning() << "The Big Block Depot is damaged";
|
qWarning() << "The Big Block Depot is damaged";
|
||||||
} else {
|
} else {
|
||||||
qWarning() << "The Small Block Depot is damaged";
|
qWarning() << "The Small Block Depot is damaged";
|
||||||
}
|
}
|
||||||
|
return (tToRead == 0);
|
||||||
}
|
}
|
||||||
if(ulOffset >= (ULONG)tBlockSize) {
|
if(ulOffset >= (ULONG)tBlockSize) {
|
||||||
ulOffset -= tBlockSize;
|
ulOffset -= tBlockSize;
|
||||||
|
|
Loading…
Reference in New Issue