Try to reduce the memory footprint a little bit.
This commit is contained in:
parent
941435ed6b
commit
05fe8b5b85
|
@ -55,14 +55,16 @@ ChineseSegmentation *ChineseSegmentation::getInstance()
|
||||||
return global_instance_chinese_segmentation;
|
return global_instance_chinese_segmentation;
|
||||||
}
|
}
|
||||||
|
|
||||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString& str)
|
QVector<SKeyWord> ChineseSegmentation::callSegement(QString str)
|
||||||
{
|
{
|
||||||
std::string s;
|
std::string s;
|
||||||
s=str.toStdString();
|
s=str.toStdString();
|
||||||
|
str.squeeze();
|
||||||
|
|
||||||
const size_t topk = -1;
|
const size_t topk = -1;
|
||||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||||
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
||||||
|
std::string().swap(s);
|
||||||
QVector<SKeyWord> vecNeeds;
|
QVector<SKeyWord> vecNeeds;
|
||||||
convert(keywordres, vecNeeds);
|
convert(keywordres, vecNeeds);
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
||||||
public:
|
public:
|
||||||
static ChineseSegmentation *getInstance();
|
static ChineseSegmentation *getInstance();
|
||||||
~ChineseSegmentation();
|
~ChineseSegmentation();
|
||||||
QVector<SKeyWord> callSegement(QString &str);
|
QVector<SKeyWord> callSegement(QString str);
|
||||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
||||||
private:
|
private:
|
||||||
static QMutex m_mutex;
|
static QMutex m_mutex;
|
||||||
|
|
|
@ -529,6 +529,11 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
||||||
{
|
{
|
||||||
QDomElement wt = wr.firstChildElement("w:t");
|
QDomElement wt = wr.firstChildElement("w:t");
|
||||||
textcontent.append(wt.text().replace("\n",""));
|
textcontent.append(wt.text().replace("\n",""));
|
||||||
|
if(textcontent.length() >= 682666) //20480000/3
|
||||||
|
{
|
||||||
|
file.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
wr = wr.nextSiblingElement();
|
wr = wr.nextSiblingElement();
|
||||||
}
|
}
|
||||||
wp = wp.nextSiblingElement();
|
wp = wp.nextSiblingElement();
|
||||||
|
@ -545,7 +550,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
||||||
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
|
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
QByteArray encodedString = file.readAll();
|
QByteArray encodedString = file.read(20480000);
|
||||||
|
|
||||||
uchardet_t chardet = uchardet_new();
|
uchardet_t chardet = uchardet_new();
|
||||||
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)
|
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)
|
||||||
|
|
|
@ -118,7 +118,7 @@ void ConstructDocumentForContent::run()
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||||
|
|
||||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
|
@ -135,6 +135,7 @@ void ConstructDocumentForContent::run()
|
||||||
_doc_list_content->append(doc);
|
_doc_list_content->append(doc);
|
||||||
_mutex_doc_list_content.unlock();
|
_mutex_doc_list_content.unlock();
|
||||||
content.clear();
|
content.clear();
|
||||||
|
content.squeeze();
|
||||||
term.clear();
|
term.clear();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -5061,6 +5061,8 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
|
||||||
ushort* usAucData = (ushort*)ptaucBytes;
|
ushort* usAucData = (ushort*)ptaucBytes;
|
||||||
content.append(QString::fromUtf16(usAucData).replace("\r",""));
|
content.append(QString::fromUtf16(usAucData).replace("\r",""));
|
||||||
usAucData = (ushort*)xfree((void*)usAucData);
|
usAucData = (ushort*)xfree((void*)usAucData);
|
||||||
|
if(content.length() >= 682666) //20480000/3
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue