Merge branch '0413-dev' into 'main'
Add support for pdf in file parser. See merge request kylin-desktop/ukui-search!15
This commit is contained in:
commit
b4775df2eb
|
@ -16,7 +16,8 @@ Build-Depends: debhelper (>=9.0.0),
|
||||||
libkf5windowsystem-dev,
|
libkf5windowsystem-dev,
|
||||||
libgsettings-qt-dev,
|
libgsettings-qt-dev,
|
||||||
libqt5x11extras5-dev,
|
libqt5x11extras5-dev,
|
||||||
libuchardet-dev
|
libuchardet-dev,
|
||||||
|
libpoppler-qt5-dev
|
||||||
Standards-Version: 4.5.0
|
Standards-Version: 4.5.0
|
||||||
Homepage: https://www.ukui.org/
|
Homepage: https://www.ukui.org/
|
||||||
Vcs-Git: https://github.com/ukui/ukui-search.git
|
Vcs-Git: https://github.com/ukui/ukui-search.git
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include <QDomDocument>
|
#include <QDomDocument>
|
||||||
#include <QQueue>
|
#include <QQueue>
|
||||||
#include "uchardet/uchardet.h"
|
#include "uchardet/uchardet.h"
|
||||||
|
#include "poppler-qt5.h"
|
||||||
|
|
||||||
|
|
||||||
size_t FileUtils::_max_index_count = 0;
|
size_t FileUtils::_max_index_count = 0;
|
||||||
|
@ -619,7 +620,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
|
||||||
while(!ar.isNull())
|
while(!ar.isNull())
|
||||||
{
|
{
|
||||||
at = ar.firstChildElement("a:t");
|
at = ar.firstChildElement("a:t");
|
||||||
textcontent.append(at.text().replace("\r","")).replace("\t"," ");
|
textcontent.append(at.text().replace("\r","")).replace("\t","");
|
||||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||||
{
|
{
|
||||||
file.close();
|
file.close();
|
||||||
|
@ -678,7 +679,7 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
|
||||||
}
|
}
|
||||||
if(t.isNull())
|
if(t.isNull())
|
||||||
continue;
|
continue;
|
||||||
textcontent.append(t.text().replace("\r","").replace("\n"," "));
|
textcontent.append(t.text().replace("\r","").replace("\n",""));
|
||||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||||
{
|
{
|
||||||
file.close();
|
file.close();
|
||||||
|
@ -692,6 +693,23 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FileUtils::getPdfTextContent(QString &path, QString &textcontent)
|
||||||
|
{
|
||||||
|
Poppler::Document *doc = Poppler::Document::load(path);
|
||||||
|
if(doc->isLocked())
|
||||||
|
return;
|
||||||
|
const QRectF qf;
|
||||||
|
int pageNum = doc->numPages();
|
||||||
|
for(int i = 0; i<pageNum; ++i)
|
||||||
|
{
|
||||||
|
textcontent.append(doc->page(i)->text(qf).replace("\n",""));
|
||||||
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
delete doc;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
||||||
{
|
{
|
||||||
QFile file(path);
|
QFile file(path);
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
#include <QMimeDatabase>
|
#include <QMimeDatabase>
|
||||||
#include <QMimeType>
|
#include <QMimeType>
|
||||||
#include <QDir>
|
#include <QDir>
|
||||||
|
|
||||||
#include "libsearch_global.h"
|
#include "libsearch_global.h"
|
||||||
//#define INITIAL_STATE 0
|
//#define INITIAL_STATE 0
|
||||||
//#define CREATING_INDEX 1
|
//#define CREATING_INDEX 1
|
||||||
|
@ -67,6 +68,7 @@ public:
|
||||||
static void getDocxTextContent(QString &path, QString &textcontent);
|
static void getDocxTextContent(QString &path, QString &textcontent);
|
||||||
static void getPptxTextContent(QString &path, QString &textcontent);
|
static void getPptxTextContent(QString &path, QString &textcontent);
|
||||||
static void getXlsxTextContent(QString &path, QString &textcontent);
|
static void getXlsxTextContent(QString &path, QString &textcontent);
|
||||||
|
static void getPdfTextContent(QString &path, QString &textcontent);
|
||||||
static void getTxtContent(QString &path, QString &textcontent);
|
static void getTxtContent(QString &path, QString &textcontent);
|
||||||
static size_t _max_index_count;
|
static size_t _max_index_count;
|
||||||
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
||||||
|
|
|
@ -55,6 +55,11 @@ void FileReader::getTextContent(QString path, QString &textContent)
|
||||||
searchdata.RunParser(path,textContent);
|
searchdata.RunParser(path,textContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if(name == "application/pdf")
|
||||||
|
{
|
||||||
|
if(strsfx.endsWith( "pdf"))
|
||||||
|
FileUtils::getPdfTextContent(path,textContent);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]";
|
qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]";
|
||||||
|
|
|
@ -75,7 +75,8 @@ private:
|
||||||
std::map<QString, bool>::value_type("wps", true),
|
std::map<QString, bool>::value_type("wps", true),
|
||||||
std::map<QString, bool>::value_type("pps", true),
|
std::map<QString, bool>::value_type("pps", true),
|
||||||
std::map<QString, bool>::value_type("dps", true),
|
std::map<QString, bool>::value_type("dps", true),
|
||||||
std::map<QString, bool>::value_type("et", true)
|
std::map<QString, bool>::value_type("et", true),
|
||||||
|
std::map<QString, bool>::value_type("pdf", true)
|
||||||
};
|
};
|
||||||
|
|
||||||
//xapian will auto commit per 10,000 changes, donnot change it!!!
|
//xapian will auto commit per 10,000 changes, donnot change it!!!
|
||||||
|
|
|
@ -73,7 +73,8 @@ private:
|
||||||
std::map<QString, bool>::value_type("wps", true),
|
std::map<QString, bool>::value_type("wps", true),
|
||||||
std::map<QString, bool>::value_type("pps", true),
|
std::map<QString, bool>::value_type("pps", true),
|
||||||
std::map<QString, bool>::value_type("dps", true),
|
std::map<QString, bool>::value_type("dps", true),
|
||||||
std::map<QString, bool>::value_type("et", true)
|
std::map<QString, bool>::value_type("et", true),
|
||||||
|
std::map<QString, bool>::value_type("pdf", true)
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -5,7 +5,7 @@ TARGET = ukui-search
|
||||||
TEMPLATE = lib
|
TEMPLATE = lib
|
||||||
DEFINES += LIBSEARCH_LIBRARY
|
DEFINES += LIBSEARCH_LIBRARY
|
||||||
|
|
||||||
PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 gsettings-qt
|
PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 gsettings-qt poppler-qt5
|
||||||
|
|
||||||
CONFIG += c++11 link_pkgconfig no_keywords lrelease
|
CONFIG += c++11 link_pkgconfig no_keywords lrelease
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue