Try to reduce the memory footprint a little bit.

This commit is contained in:
iaom 2021-03-16 17:21:10 +08:00
parent 941435ed6b
commit 05fe8b5b85
5 changed files with 14 additions and 4 deletions

View File

@ -55,14 +55,16 @@ ChineseSegmentation *ChineseSegmentation::getInstance()
return global_instance_chinese_segmentation; return global_instance_chinese_segmentation;
} }
QVector<SKeyWord> ChineseSegmentation::callSegement(QString& str) QVector<SKeyWord> ChineseSegmentation::callSegement(QString str)
{ {
std::string s; std::string s;
s=str.toStdString(); s=str.toStdString();
str.squeeze();
const size_t topk = -1; const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres; std::vector<cppjieba::KeywordExtractor::Word> keywordres;
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk); ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
std::string().swap(s);
QVector<SKeyWord> vecNeeds; QVector<SKeyWord> vecNeeds;
convert(keywordres, vecNeeds); convert(keywordres, vecNeeds);

View File

@ -48,7 +48,7 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation
public: public:
static ChineseSegmentation *getInstance(); static ChineseSegmentation *getInstance();
~ChineseSegmentation(); ~ChineseSegmentation();
QVector<SKeyWord> callSegement(QString &str); QVector<SKeyWord> callSegement(QString str);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw); void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
private: private:
static QMutex m_mutex; static QMutex m_mutex;

View File

@ -529,6 +529,11 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
{ {
QDomElement wt = wr.firstChildElement("w:t"); QDomElement wt = wr.firstChildElement("w:t");
textcontent.append(wt.text().replace("\n","")); textcontent.append(wt.text().replace("\n",""));
if(textcontent.length() >= 682666) //20480000/3
{
file.close();
return;
}
wr = wr.nextSiblingElement(); wr = wr.nextSiblingElement();
} }
wp = wp.nextSiblingElement(); wp = wp.nextSiblingElement();
@ -545,7 +550,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent)
if(!file.open(QIODevice::ReadOnly|QIODevice::Text)) if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
return; return;
QByteArray encodedString = file.readAll(); QByteArray encodedString = file.read(20480000);
uchardet_t chardet = uchardet_new(); uchardet_t chardet = uchardet_new();
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0) if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)

View File

@ -118,7 +118,7 @@ void ConstructDocumentForContent::run()
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep))); QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content); QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
Document doc; Document doc;
doc.setData(content); doc.setData(content);
@ -135,6 +135,7 @@ void ConstructDocumentForContent::run()
_doc_list_content->append(doc); _doc_list_content->append(doc);
_mutex_doc_list_content.unlock(); _mutex_doc_list_content.unlock();
content.clear(); content.clear();
content.squeeze();
term.clear(); term.clear();
return; return;
} }

View File

@ -5061,6 +5061,8 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
ushort* usAucData = (ushort*)ptaucBytes; ushort* usAucData = (ushort*)ptaucBytes;
content.append(QString::fromUtf16(usAucData).replace("\r","")); content.append(QString::fromUtf16(usAucData).replace("\r",""));
usAucData = (ushort*)xfree((void*)usAucData); usAucData = (ushort*)xfree((void*)usAucData);
if(content.length() >= 682666) //20480000/3
break;
} }
else else
{ {