Merge pull request #181 from iaom/0313-dev
Try to reduce the memory footprint a little bit.
This commit is contained in:
commit
b1304608e7
|
@ -55,14 +55,16 @@ ChineseSegmentation *ChineseSegmentation::getInstance()
|
|||
return global_instance_chinese_segmentation;
|
||||
}
|
||||
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString& str)
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString str)
|
||||
{
|
||||
std::string s;
|
||||
s=str.toStdString();
|
||||
str.squeeze();
|
||||
|
||||
const size_t topk = -1;
|
||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
||||
std::string().swap(s);
|
||||
QVector<SKeyWord> vecNeeds;
|
||||
convert(keywordres, vecNeeds);
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
|||
public:
|
||||
static ChineseSegmentation *getInstance();
|
||||
~ChineseSegmentation();
|
||||
QVector<SKeyWord> callSegement(QString &str);
|
||||
QVector<SKeyWord> callSegement(QString str);
|
||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
||||
private:
|
||||
static QMutex m_mutex;
|
||||
|
|
|
@ -529,6 +529,11 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
{
|
||||
QDomElement wt = wr.firstChildElement("w:t");
|
||||
textcontent.append(wt.text().replace("\n",""));
|
||||
if(textcontent.length() >= 682666) //20480000/3
|
||||
{
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
wr = wr.nextSiblingElement();
|
||||
}
|
||||
wp = wp.nextSiblingElement();
|
||||
|
@ -545,7 +550,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
|||
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
|
||||
return;
|
||||
|
||||
QByteArray encodedString = file.readAll();
|
||||
QByteArray encodedString = file.read(20480000);
|
||||
|
||||
uchardet_t chardet = uchardet_new();
|
||||
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)
|
||||
|
|
|
@ -118,7 +118,7 @@ void ConstructDocumentForContent::run()
|
|||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||
|
||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
||||
|
||||
Document doc;
|
||||
doc.setData(content);
|
||||
|
@ -135,6 +135,7 @@ void ConstructDocumentForContent::run()
|
|||
_doc_list_content->append(doc);
|
||||
_mutex_doc_list_content.unlock();
|
||||
content.clear();
|
||||
content.squeeze();
|
||||
term.clear();
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -5061,6 +5061,8 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
|
|||
ushort* usAucData = (ushort*)ptaucBytes;
|
||||
content.append(QString::fromUtf16(usAucData).replace("\r",""));
|
||||
usAucData = (ushort*)xfree((void*)usAucData);
|
||||
if(content.length() >= 682666) //20480000/3
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue