Merge branch '0413-dev' into 'main'

Add support for pdf in file parser.

See merge request kylin-desktop/ukui-search!15
This commit is contained in:
Zihao Zhang 2021-04-15 05:44:30 +00:00
commit b4775df2eb
7 changed files with 34 additions and 6 deletions

3
debian/control vendored
View File

@ -16,7 +16,8 @@ Build-Depends: debhelper (>=9.0.0),
libkf5windowsystem-dev, libkf5windowsystem-dev,
libgsettings-qt-dev, libgsettings-qt-dev,
libqt5x11extras5-dev, libqt5x11extras5-dev,
libuchardet-dev libuchardet-dev,
libpoppler-qt5-dev
Standards-Version: 4.5.0 Standards-Version: 4.5.0
Homepage: https://www.ukui.org/ Homepage: https://www.ukui.org/
Vcs-Git: https://github.com/ukui/ukui-search.git Vcs-Git: https://github.com/ukui/ukui-search.git

View File

@ -30,6 +30,7 @@
#include <QDomDocument> #include <QDomDocument>
#include <QQueue> #include <QQueue>
#include "uchardet/uchardet.h" #include "uchardet/uchardet.h"
#include "poppler-qt5.h"
size_t FileUtils::_max_index_count = 0; size_t FileUtils::_max_index_count = 0;
@ -619,7 +620,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
while(!ar.isNull()) while(!ar.isNull())
{ {
at = ar.firstChildElement("a:t"); at = ar.firstChildElement("a:t");
textcontent.append(at.text().replace("\r","")).replace("\t"," "); textcontent.append(at.text().replace("\r","")).replace("\t","");
if(textcontent.length() >= MAX_CONTENT_LENGTH/3) if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
{ {
file.close(); file.close();
@ -678,7 +679,7 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
} }
if(t.isNull()) if(t.isNull())
continue; continue;
textcontent.append(t.text().replace("\r","").replace("\n"," ")); textcontent.append(t.text().replace("\r","").replace("\n",""));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3) if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
{ {
file.close(); file.close();
@ -692,6 +693,23 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
return; return;
} }
void FileUtils::getPdfTextContent(QString &path, QString &textcontent)
{
Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked())
return;
const QRectF qf;
int pageNum = doc->numPages();
for(int i = 0; i<pageNum; ++i)
{
textcontent.append(doc->page(i)->text(qf).replace("\n",""));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
break;
}
delete doc;
return;
}
void FileUtils::getTxtContent(QString &path, QString &textcontent) void FileUtils::getTxtContent(QString &path, QString &textcontent)
{ {
QFile file(path); QFile file(path);

View File

@ -35,6 +35,7 @@
#include <QMimeDatabase> #include <QMimeDatabase>
#include <QMimeType> #include <QMimeType>
#include <QDir> #include <QDir>
#include "libsearch_global.h" #include "libsearch_global.h"
//#define INITIAL_STATE 0 //#define INITIAL_STATE 0
//#define CREATING_INDEX 1 //#define CREATING_INDEX 1
@ -67,6 +68,7 @@ public:
static void getDocxTextContent(QString &path, QString &textcontent); static void getDocxTextContent(QString &path, QString &textcontent);
static void getPptxTextContent(QString &path, QString &textcontent); static void getPptxTextContent(QString &path, QString &textcontent);
static void getXlsxTextContent(QString &path, QString &textcontent); static void getXlsxTextContent(QString &path, QString &textcontent);
static void getPdfTextContent(QString &path, QString &textcontent);
static void getTxtContent(QString &path, QString &textcontent); static void getTxtContent(QString &path, QString &textcontent);
static size_t _max_index_count; static size_t _max_index_count;
static size_t _current_index_count; //this one has been Abandoned,do not use it. static size_t _current_index_count; //this one has been Abandoned,do not use it.

View File

@ -55,6 +55,11 @@ void FileReader::getTextContent(QString path, QString &textContent)
searchdata.RunParser(path,textContent); searchdata.RunParser(path,textContent);
} }
} }
else if(name == "application/pdf")
{
if(strsfx.endsWith( "pdf"))
FileUtils::getPdfTextContent(path,textContent);
}
else else
{ {
qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]"; qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]";

View File

@ -75,7 +75,8 @@ private:
std::map<QString, bool>::value_type("wps", true), std::map<QString, bool>::value_type("wps", true),
std::map<QString, bool>::value_type("pps", true), std::map<QString, bool>::value_type("pps", true),
std::map<QString, bool>::value_type("dps", true), std::map<QString, bool>::value_type("dps", true),
std::map<QString, bool>::value_type("et", true) std::map<QString, bool>::value_type("et", true),
std::map<QString, bool>::value_type("pdf", true)
}; };
//xapian will auto commit per 10,000 changes, donnot change it!!! //xapian will auto commit per 10,000 changes, donnot change it!!!

View File

@ -73,7 +73,8 @@ private:
std::map<QString, bool>::value_type("wps", true), std::map<QString, bool>::value_type("wps", true),
std::map<QString, bool>::value_type("pps", true), std::map<QString, bool>::value_type("pps", true),
std::map<QString, bool>::value_type("dps", true), std::map<QString, bool>::value_type("dps", true),
std::map<QString, bool>::value_type("et", true) std::map<QString, bool>::value_type("et", true),
std::map<QString, bool>::value_type("pdf", true)
}; };
}; };

View File

@ -5,7 +5,7 @@ TARGET = ukui-search
TEMPLATE = lib TEMPLATE = lib
DEFINES += LIBSEARCH_LIBRARY DEFINES += LIBSEARCH_LIBRARY
PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 gsettings-qt PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 gsettings-qt poppler-qt5
CONFIG += c++11 link_pkgconfig no_keywords lrelease CONFIG += c++11 link_pkgconfig no_keywords lrelease