Merge pull request #181 from iaom/0313-dev

Try to reduce the memory footprint a little bit.
This commit is contained in:
iaom 2021-03-16 18:03:41 +08:00 committed by GitHub
commit b1304608e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 14 additions and 4 deletions

View File

@ -55,14 +55,16 @@ ChineseSegmentation *ChineseSegmentation::getInstance()
return global_instance_chinese_segmentation;
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString& str)
QVector<SKeyWord> ChineseSegmentation::callSegement(QString str)
{
std::string s;
s=str.toStdString();
str.squeeze();
const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
std::string().swap(s);
QVector<SKeyWord> vecNeeds;
convert(keywordres, vecNeeds);

View File

@ -48,7 +48,7 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation
public:
static ChineseSegmentation *getInstance();
~ChineseSegmentation();
QVector<SKeyWord> callSegement(QString &str);
QVector<SKeyWord> callSegement(QString str);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
private:
static QMutex m_mutex;

View File

@ -529,6 +529,11 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
{
QDomElement wt = wr.firstChildElement("w:t");
textcontent.append(wt.text().replace("\n",""));
if(textcontent.length() >= 682666) //20480000/3
{
file.close();
return;
}
wr = wr.nextSiblingElement();
}
wp = wp.nextSiblingElement();
@ -545,7 +550,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent)
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
return;
QByteArray encodedString = file.readAll();
QByteArray encodedString = file.read(20480000);
uchardet_t chardet = uchardet_new();
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)

View File

@ -118,7 +118,7 @@ void ConstructDocumentForContent::run()
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
Document doc;
doc.setData(content);
@ -135,6 +135,7 @@ void ConstructDocumentForContent::run()
_doc_list_content->append(doc);
_mutex_doc_list_content.unlock();
content.clear();
content.squeeze();
term.clear();
return;
}

View File

@ -5061,6 +5061,8 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
ushort* usAucData = (ushort*)ptaucBytes;
content.append(QString::fromUtf16(usAucData).replace("\r",""));
usAucData = (ushort*)xfree((void*)usAucData);
if(content.length() >= 682666) //20480000/3
break;
}
else
{