Add support for pdf in file parser.
This commit is contained in:
parent
9ec8f32725
commit
d3caad4f56
|
@ -16,7 +16,8 @@ Build-Depends: debhelper (>=9.0.0),
|
|||
libkf5windowsystem-dev,
|
||||
libgsettings-qt-dev,
|
||||
libqt5x11extras5-dev,
|
||||
libuchardet-dev
|
||||
libuchardet-dev,
|
||||
libpoppler-qt5-dev
|
||||
Standards-Version: 4.5.0
|
||||
Homepage: https://www.ukui.org/
|
||||
Vcs-Git: https://github.com/ukui/ukui-search.git
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <QDomDocument>
|
||||
#include <QQueue>
|
||||
#include "uchardet/uchardet.h"
|
||||
#include "poppler-qt5.h"
|
||||
|
||||
|
||||
size_t FileUtils::_max_index_count = 0;
|
||||
|
@ -619,7 +620,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
|
|||
while(!ar.isNull())
|
||||
{
|
||||
at = ar.firstChildElement("a:t");
|
||||
textcontent.append(at.text().replace("\r","")).replace("\t"," ");
|
||||
textcontent.append(at.text().replace("\r","")).replace("\t","");
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
{
|
||||
file.close();
|
||||
|
@ -678,7 +679,7 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
|
|||
}
|
||||
if(t.isNull())
|
||||
continue;
|
||||
textcontent.append(t.text().replace("\r","").replace("\n"," "));
|
||||
textcontent.append(t.text().replace("\r","").replace("\n",""));
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
{
|
||||
file.close();
|
||||
|
@ -692,6 +693,23 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
|
|||
return;
|
||||
}
|
||||
|
||||
void FileUtils::getPdfTextContent(QString &path, QString &textcontent)
|
||||
{
|
||||
Poppler::Document *doc = Poppler::Document::load(path);
|
||||
if(doc->isLocked())
|
||||
return;
|
||||
const QRectF qf;
|
||||
int pageNum = doc->numPages();
|
||||
for(int i = 0; i<pageNum; ++i)
|
||||
{
|
||||
textcontent.append(doc->page(i)->text(qf).replace("\n",""));
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
break;
|
||||
}
|
||||
delete doc;
|
||||
return;
|
||||
}
|
||||
|
||||
void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
||||
{
|
||||
QFile file(path);
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <QMimeDatabase>
|
||||
#include <QMimeType>
|
||||
#include <QDir>
|
||||
|
||||
#include "libsearch_global.h"
|
||||
//#define INITIAL_STATE 0
|
||||
//#define CREATING_INDEX 1
|
||||
|
@ -67,6 +68,7 @@ public:
|
|||
static void getDocxTextContent(QString &path, QString &textcontent);
|
||||
static void getPptxTextContent(QString &path, QString &textcontent);
|
||||
static void getXlsxTextContent(QString &path, QString &textcontent);
|
||||
static void getPdfTextContent(QString &path, QString &textcontent);
|
||||
static void getTxtContent(QString &path, QString &textcontent);
|
||||
static size_t _max_index_count;
|
||||
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
||||
|
|
|
@ -55,6 +55,11 @@ void FileReader::getTextContent(QString path, QString &textContent)
|
|||
searchdata.RunParser(path,textContent);
|
||||
}
|
||||
}
|
||||
else if(name == "application/pdf")
|
||||
{
|
||||
if(strsfx.endsWith( "pdf"))
|
||||
FileUtils::getPdfTextContent(path,textContent);
|
||||
}
|
||||
else
|
||||
{
|
||||
qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]";
|
||||
|
|
|
@ -75,7 +75,8 @@ private:
|
|||
std::map<QString, bool>::value_type("wps", true),
|
||||
std::map<QString, bool>::value_type("pps", true),
|
||||
std::map<QString, bool>::value_type("dps", true),
|
||||
std::map<QString, bool>::value_type("et", true)
|
||||
std::map<QString, bool>::value_type("et", true),
|
||||
std::map<QString, bool>::value_type("pdf", true)
|
||||
};
|
||||
|
||||
//xapian will auto commit per 10,000 changes, donnot change it!!!
|
||||
|
|
|
@ -73,7 +73,8 @@ private:
|
|||
std::map<QString, bool>::value_type("wps", true),
|
||||
std::map<QString, bool>::value_type("pps", true),
|
||||
std::map<QString, bool>::value_type("dps", true),
|
||||
std::map<QString, bool>::value_type("et", true)
|
||||
std::map<QString, bool>::value_type("et", true),
|
||||
std::map<QString, bool>::value_type("pdf", true)
|
||||
};
|
||||
|
||||
};
|
||||
|
|
|
@ -5,7 +5,7 @@ TARGET = ukui-search
|
|||
TEMPLATE = lib
|
||||
DEFINES += LIBSEARCH_LIBRARY
|
||||
|
||||
PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 gsettings-qt
|
||||
PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 gsettings-qt poppler-qt5
|
||||
|
||||
CONFIG += c++11 link_pkgconfig no_keywords lrelease
|
||||
|
||||
|
|
Loading…
Reference in New Issue