Merge branch 'ukss' into 'ukss-dev'
文本内容搜索扩展OCR功能; See merge request kylin-desktop/ukui-search!288
This commit is contained in:
commit
324da7cb11
|
@ -13,6 +13,7 @@ static const QString OCR_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.u
|
|||
static const QString FILE_SEARCH_VALUE = QStringLiteral("0");
|
||||
static const QString DIR_SEARCH_VALUE = QStringLiteral("1");
|
||||
static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem");
|
||||
static const int OCR_MIN_SIZE = 200;
|
||||
|
||||
static const QStringList allAppPath = {
|
||||
{HOME_PATH + "/.local/share/applications/"},
|
||||
|
@ -37,6 +38,20 @@ static const QMap<QString, bool> targetFileTypeMap = {
|
|||
|
||||
static const QMap<QString, bool> targetPhotographTypeMap = {
|
||||
{"png", true},
|
||||
{"bmp", true},
|
||||
{"hdr", false},
|
||||
{"gif", true},
|
||||
{"tif", true},
|
||||
{"tiff", true},
|
||||
{"heif", false},
|
||||
{"webp", true},
|
||||
{"jpe", true},
|
||||
{"dib", false},
|
||||
{"psd", false},
|
||||
{"jng", false},
|
||||
{"xpm", false},//pix read error.
|
||||
{"j2k", false},
|
||||
{"jp2", false},
|
||||
{"jpg", true},
|
||||
{"jpeg", true} // TODO 待完善,后续改为配置文件
|
||||
};
|
||||
|
|
|
@ -961,6 +961,28 @@ bool FileUtils::isEncrypedOrUnreadable(QString path)
|
|||
}
|
||||
}
|
||||
|
||||
bool FileUtils::isOcrSupportSize(QString path)
|
||||
{
|
||||
/*
|
||||
bool res;
|
||||
Pix *image = pixRead(path.toStdString().data());
|
||||
if (image->h < OCR_MIN_SIZE or image->w < OCR_MIN_SIZE) {//限制图片像素尺寸
|
||||
qDebug() << "file:" << path << "is not right size.";
|
||||
res = false;
|
||||
} else
|
||||
res = true;
|
||||
|
||||
pixDestroy(&image);
|
||||
return res;
|
||||
*/
|
||||
QImage file(path);
|
||||
if (file.height() < OCR_MIN_SIZE or file.width() < OCR_MIN_SIZE) {//限制图片像素尺寸
|
||||
qDebug() << "file:" << path << "is not right size.";
|
||||
return false;
|
||||
} else
|
||||
return true;
|
||||
}
|
||||
|
||||
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
|
||||
{
|
||||
QString htmlString;
|
||||
|
|
|
@ -54,6 +54,8 @@
|
|||
|
||||
#include "libsearch_global.h"
|
||||
#include "common.h"
|
||||
#include <leptonica/allheaders.h>
|
||||
|
||||
//#define INITIAL_STATE 0
|
||||
//#define CREATING_INDEX 1
|
||||
//#define FINISH_CREATING_INDEX 2
|
||||
|
@ -97,6 +99,7 @@ public:
|
|||
static QIcon iconFromTheme(const QString &name, const QIcon &iconDefault);
|
||||
static bool isOpenXMLFileEncrypted(QString &path);
|
||||
static bool isEncrypedOrUnreadable(QString path);
|
||||
static bool isOcrSupportSize(QString path);
|
||||
static size_t maxIndexCount;
|
||||
static unsigned short indexStatus;
|
||||
|
||||
|
|
|
@ -102,12 +102,14 @@ void ConstructDocumentForContent::run() {
|
|||
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
||||
//构造文本索引的document
|
||||
QString content;
|
||||
FileReader::getTextContent(m_path, content);
|
||||
QString suffix;
|
||||
FileReader::getTextContent(m_path, content, suffix);
|
||||
|
||||
Document doc;
|
||||
doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
|
||||
doc.addTerm("ZEEKERUPTERM" + FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||
doc.addValue(1, m_path);
|
||||
doc.addValue(2, suffix);
|
||||
|
||||
if(content.isEmpty()) {
|
||||
doc.reuireDeleted();
|
||||
|
@ -140,12 +142,14 @@ ConstructDocumentForOcr::ConstructDocumentForOcr(QString path)
|
|||
void ConstructDocumentForOcr::run()
|
||||
{
|
||||
QString content;
|
||||
FileReader::getTextContent(m_path, content);
|
||||
QString suffix;
|
||||
FileReader::getTextContent(m_path, content, suffix);
|
||||
|
||||
Document doc;
|
||||
doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
|
||||
doc.addTerm("ZEEKERUPTERM" + FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||
doc.addValue(1, m_path);
|
||||
doc.addValue(2, suffix);
|
||||
|
||||
if(content.isEmpty()) {
|
||||
doc.reuireDeleted();
|
||||
|
|
|
@ -25,25 +25,26 @@ using namespace UkuiSearch;
|
|||
FileReader::FileReader(QObject *parent) : QObject(parent) {
|
||||
|
||||
}
|
||||
void FileReader::getTextContent(QString path, QString &textContent) {
|
||||
void FileReader::getTextContent(QString path, QString &textContent, QString &suffix) {
|
||||
QFileInfo file(path);
|
||||
QString strsfx = file.suffix();
|
||||
if (strsfx == "docx") {
|
||||
suffix = file.suffix();
|
||||
|
||||
if (suffix == "docx") {
|
||||
FileUtils::getDocxTextContent(path, textContent);
|
||||
} else if (strsfx == "pptx") {
|
||||
} else if (suffix == "pptx") {
|
||||
FileUtils::getPptxTextContent(path, textContent);
|
||||
} else if (strsfx == "xlsx") {
|
||||
} else if (suffix == "xlsx") {
|
||||
FileUtils::getXlsxTextContent(path, textContent);
|
||||
} else if (strsfx == "txt") {
|
||||
} else if (suffix == "txt") {
|
||||
FileUtils::getTxtContent(path, textContent);
|
||||
} else if (strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
|
||||
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
|
||||
} else if (suffix == "doc" || suffix == "dot" || suffix == "wps" || suffix == "ppt" ||
|
||||
suffix == "pps" || suffix == "dps" || suffix == "et" || suffix == "xls") {
|
||||
KBinaryParser searchdata;
|
||||
searchdata.RunParser(path, textContent);
|
||||
} else if (strsfx == "pdf") {
|
||||
} else if (suffix == "pdf") {
|
||||
FileUtils::getPdfTextContent(path, textContent);
|
||||
} else if (strsfx == "png" || strsfx == "jpg" || strsfx == "jpeg"){
|
||||
OcrObject::getInstance()->getTxtContent(path, textContent);;
|
||||
} else if (true == targetPhotographTypeMap[suffix]){
|
||||
OcrObject::getInstance()->getTxtContent(path, textContent);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ class FileReader : public QObject {
|
|||
public:
|
||||
explicit FileReader(QObject *parent = nullptr);
|
||||
~FileReader() = default;
|
||||
static void getTextContent(QString path, QString &textContent);
|
||||
static void getTextContent(QString path, QString &textContent, QString &suffix);
|
||||
|
||||
};
|
||||
}
|
||||
|
|
|
@ -4,6 +4,9 @@
|
|||
#include <QLabel>
|
||||
#include <QHBoxLayout>
|
||||
#include <QMessageBox>
|
||||
|
||||
#define OCR_ICONLABLE_WITH 352
|
||||
#define OCR_ICONLABLE_HEIGHT 256
|
||||
using namespace UkuiSearch;
|
||||
|
||||
FileSearchPlugin::FileSearchPlugin(QObject *parent) : QObject(parent)
|
||||
|
@ -471,10 +474,27 @@ void FileContengSearchPlugin::openAction(int actionkey, QString key, int type)
|
|||
|
||||
QWidget *FileContengSearchPlugin::detailPage(const ResultInfo &ri)
|
||||
{
|
||||
if (1 == ri.type) {
|
||||
QPixmap pixmap;
|
||||
pixmap.load(ri.actionKey);
|
||||
if (pixmap.width()/OCR_ICONLABLE_WITH > pixmap.height()/OCR_ICONLABLE_HEIGHT) {
|
||||
pixmap = pixmap.scaled(OCR_ICONLABLE_WITH, (pixmap.height()*OCR_ICONLABLE_WITH)/pixmap.width(), Qt::KeepAspectRatio, Qt::SmoothTransformation);
|
||||
} else {
|
||||
pixmap = pixmap.scaled((pixmap.width()*OCR_ICONLABLE_HEIGHT)/pixmap.height(), OCR_ICONLABLE_HEIGHT, Qt::KeepAspectRatio, Qt::SmoothTransformation);
|
||||
}
|
||||
m_iconLabel->setPixmap(pixmap);
|
||||
m_pluginLabel->setText(tr("OCR"));
|
||||
m_detailLyt->setContentsMargins(8, (OCR_ICONLABLE_HEIGHT-pixmap.height())/2+8, 16, 0);
|
||||
m_snippetLabel->hide();
|
||||
} else {
|
||||
m_iconLabel->setPixmap(ri.icon.pixmap(120, 120));
|
||||
m_pluginLabel->setText(tr("File"));
|
||||
m_snippetLabel->setText(getHtmlText(wrapData(m_snippetLabel,ri.description.at(0).value), m_keyWord));
|
||||
m_snippetLabel->show();
|
||||
m_detailLyt->setContentsMargins(8, 50, 16, 0);
|
||||
}
|
||||
m_currentActionKey = ri.actionKey;
|
||||
m_iconLabel->setPixmap(ri.icon.pixmap(120, 120));
|
||||
|
||||
m_pluginLabel->setText(tr("File"));
|
||||
QFontMetrics fontMetrics = m_nameLabel->fontMetrics();
|
||||
QString showname = fontMetrics.elidedText(ri.name, Qt::ElideRight, 215); //当字体长度超过215时显示为省略号
|
||||
m_nameLabel->setText(FileUtils::setAllTextBold(showname));
|
||||
|
@ -484,7 +504,6 @@ QWidget *FileContengSearchPlugin::detailPage(const ResultInfo &ri)
|
|||
m_nameLabel->setToolTip("");
|
||||
}
|
||||
|
||||
m_snippetLabel->setText(getHtmlText(wrapData(m_snippetLabel,ri.description.at(0).value), m_keyWord));
|
||||
m_pathLabel2->setText(m_pathLabel2->fontMetrics().elidedText(m_currentActionKey, Qt::ElideRight, m_pathLabel2->width()));
|
||||
m_pathLabel2->setToolTip(m_currentActionKey);
|
||||
m_timeLabel2->setText(ri.description.at(2).value);
|
||||
|
@ -553,10 +572,10 @@ void FileContengSearchPlugin::initDetailPage()
|
|||
m_detailPage->setFixedWidth(360);
|
||||
m_detailPage->setAttribute(Qt::WA_TranslucentBackground);
|
||||
m_detailLyt = new QVBoxLayout(m_detailPage);
|
||||
m_detailLyt->setContentsMargins(8, 0, 16, 0);
|
||||
m_detailLyt->setContentsMargins(8, 50, 16, 0);
|
||||
m_iconLabel = new QLabel(m_detailPage);
|
||||
m_iconLabel->setAlignment(Qt::AlignCenter);
|
||||
m_iconLabel->setFixedHeight(128);
|
||||
//m_iconLabel->setFixedHeight(128);
|
||||
|
||||
m_nameFrame = new QFrame(m_detailPage);
|
||||
m_nameFrameLyt = new QHBoxLayout(m_nameFrame);
|
||||
|
@ -611,7 +630,7 @@ void FileContengSearchPlugin::initDetailPage()
|
|||
m_actionFrameLyt->addWidget(m_actionLabel3);
|
||||
m_actionFrame->setLayout(m_actionFrameLyt);
|
||||
|
||||
m_detailLyt->addSpacing(50);
|
||||
// m_detailLyt->addSpacing(50);
|
||||
m_detailLyt->addWidget(m_iconLabel);
|
||||
m_detailLyt->addWidget(m_nameFrame);
|
||||
m_detailLyt->addWidget(m_line_1);
|
||||
|
|
|
@ -102,9 +102,12 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
|
|||
} else {
|
||||
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
|
||||
}
|
||||
} /*else if (true == targetPhotographTypeMap[fileInfo.fileName().split(".").last()]) {
|
||||
this->m_ocrIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
|
||||
}*/
|
||||
} else if (true == targetPhotographTypeMap[fileInfo.fileName().split(".").last()]) {
|
||||
if (FileUtils::isOcrSupportSize(fileInfo.absoluteFilePath())) {
|
||||
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
|
||||
//this->m_ocrIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FirstIndex::run() {
|
||||
|
@ -211,7 +214,6 @@ void FirstIndex::run() {
|
|||
}
|
||||
tmp2->enqueue(tempPair.first);
|
||||
}
|
||||
// qDebug() << ">>>>>>>>all fileSize:" << fileSize << "file num:" << tmp->size() << "<<<<<<<<<<<<<<<<<<<";
|
||||
if(!IndexGenerator::getInstance()->creatAllIndex(tmp2)) {
|
||||
sucess = false;
|
||||
break;
|
||||
|
@ -225,7 +227,7 @@ void FirstIndex::run() {
|
|||
}
|
||||
sem.release(2);
|
||||
});
|
||||
// OCR功能暂时屏蔽
|
||||
// OCR功能目前合到内容搜索分类中
|
||||
// QtConcurrent::run(&m_pool,[&]() {
|
||||
// sem.acquire(5);
|
||||
// mutex3.unlock();
|
||||
|
|
|
@ -175,15 +175,15 @@ IndexGenerator::IndexGenerator(QObject *parent) : QObject(parent)
|
|||
if(!database.exists()) {
|
||||
qDebug() << "create content index path" << CONTENT_INDEX_PATH << database.mkpath(CONTENT_INDEX_PATH);
|
||||
}
|
||||
database.setPath(OCR_INDEX_PATH);
|
||||
if(!database.exists()) {
|
||||
qDebug() << "create ocr index path" << OCR_INDEX_PATH << database.mkpath(OCR_INDEX_PATH);
|
||||
}
|
||||
// database.setPath(OCR_INDEX_PATH);
|
||||
// if(!database.exists()) {
|
||||
// qDebug() << "create ocr index path" << OCR_INDEX_PATH << database.mkpath(OCR_INDEX_PATH);
|
||||
// }
|
||||
|
||||
try {
|
||||
m_database_path = new Xapian::WritableDatabase(INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN);
|
||||
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN);
|
||||
m_database_ocr = new Xapian::WritableDatabase(OCR_INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN);
|
||||
// m_database_ocr = new Xapian::WritableDatabase(OCR_INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN);
|
||||
} catch(const Xapian::Error &e) {
|
||||
qWarning() << "creat Index fail!" << QString::fromStdString(e.get_description());
|
||||
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
|
||||
|
@ -419,7 +419,8 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) {
|
|||
Document doc;
|
||||
QString uniqueterm;
|
||||
QString upTerm;
|
||||
FileReader::getTextContent(path, content);
|
||||
QString suffix;
|
||||
FileReader::getTextContent(path, content, suffix);
|
||||
|
||||
term = ChineseSegmentation::getInstance()->callSegement(content.toStdString());
|
||||
// QStringList term = content.split("");
|
||||
|
@ -428,9 +429,9 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) {
|
|||
doc.setUniqueTerm(uniqueterm);
|
||||
doc.addTerm(upTerm);
|
||||
doc.addValue(1, path);
|
||||
doc.addValue(2, suffix);
|
||||
for(int i = 0; i < term.size(); ++i) {
|
||||
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
||||
|
||||
}
|
||||
|
||||
// Document doc;
|
||||
|
@ -528,12 +529,12 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
|
|||
|
||||
m_database_path->delete_document(uniqueterm);
|
||||
m_database_content->delete_document(uniqueterm);
|
||||
m_database_ocr->delete_document(uniqueterm);
|
||||
// m_database_ocr->delete_document(uniqueterm);
|
||||
|
||||
//delete all files under it if it's a dir.
|
||||
m_database_path->delete_document(upterm);
|
||||
m_database_content->delete_document(upterm);
|
||||
m_database_ocr->delete_document(upterm);
|
||||
// m_database_ocr->delete_document(upterm);
|
||||
|
||||
qDebug() << "delete path" << doc;
|
||||
// qDebug() << "delete md5" << QString::fromStdString(uniqueterm);
|
||||
|
@ -543,7 +544,7 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
|
|||
}
|
||||
m_database_path->commit();
|
||||
m_database_content->commit();
|
||||
m_database_ocr->commit();
|
||||
// m_database_ocr->commit();
|
||||
qDebug() << "--delete finish--";
|
||||
} catch(const Xapian::Error &e) {
|
||||
qWarning() << QString::fromStdString(e.get_description());
|
||||
|
@ -601,7 +602,7 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
|||
|
||||
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
|
||||
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
|
||||
QQueue<QString> *fileOcrIndexInfo = new QQueue<QString>;
|
||||
//QQueue<QString> *fileOcrIndexInfo = new QQueue<QString>;
|
||||
QStringList *deleteList = new QStringList;
|
||||
QStringList *contentDeleteList = new QStringList;
|
||||
for (PendingFile file : *pendingFiles) {
|
||||
|
@ -610,12 +611,19 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
|||
continue;
|
||||
}
|
||||
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
|
||||
if ((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) {
|
||||
if (file.path().split(".").isEmpty()){
|
||||
continue;
|
||||
}
|
||||
if (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]) {
|
||||
if (!FileUtils::isEncrypedOrUnreadable(file.path())) {
|
||||
fileContentIndexInfo->append(file.path());
|
||||
} else {
|
||||
contentDeleteList->append(file.path());
|
||||
}
|
||||
} else if (true == targetPhotographTypeMap[file.path().section("/" , -1) .split(".").last()]) {
|
||||
if (FileUtils::isOcrSupportSize(file.path())) {
|
||||
fileContentIndexInfo->append(file.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!deleteList->isEmpty()) {
|
||||
|
@ -630,9 +638,9 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
|||
if (!fileContentIndexInfo->isEmpty()) {
|
||||
creatAllIndex(fileContentIndexInfo);
|
||||
}
|
||||
if (!fileOcrIndexInfo->isEmpty()) {
|
||||
creatOcrIndex(fileOcrIndexInfo);
|
||||
}
|
||||
//if (!fileOcrIndexInfo->isEmpty()) {
|
||||
// creatOcrIndex(fileOcrIndexInfo);
|
||||
//}
|
||||
if (fileIndexInfo) {
|
||||
delete fileIndexInfo;
|
||||
fileIndexInfo = nullptr;
|
||||
|
@ -641,10 +649,10 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
|||
delete fileContentIndexInfo;
|
||||
fileContentIndexInfo = nullptr;
|
||||
}
|
||||
if (fileOcrIndexInfo) {
|
||||
delete fileOcrIndexInfo;
|
||||
fileOcrIndexInfo = nullptr;
|
||||
}
|
||||
//if (fileOcrIndexInfo) {
|
||||
// delete fileOcrIndexInfo;
|
||||
// fileOcrIndexInfo = nullptr;
|
||||
//}
|
||||
if (deleteList) {
|
||||
delete deleteList;
|
||||
deleteList = nullptr;
|
||||
|
|
|
@ -35,7 +35,6 @@
|
|||
#include "file-reader.h"
|
||||
#include "common.h"
|
||||
#include "pending-file.h"
|
||||
#include "common.h"
|
||||
|
||||
namespace UkuiSearch {
|
||||
//extern QVector<Document> *_doc_list_path;
|
||||
|
|
|
@ -13,78 +13,79 @@ OcrObject *OcrObject::getInstance()
|
|||
|
||||
void OcrObject::getTxtContent(QString &path, QString &textcontent)
|
||||
{
|
||||
m_api = new tesseract::TessBaseAPI();
|
||||
if (m_api->Init(NULL, "chi_sim")) {
|
||||
qDebug() << "Could not initialize tesseract.\n";
|
||||
return;
|
||||
}
|
||||
m_api->SetVariable("user_defined_dpi", "1080");//图片中未标明分辨率的默认设置为1080
|
||||
|
||||
Pix *image = pixRead(path.toStdString().data());
|
||||
if (!image) {
|
||||
qDebug() << "path:" << path <<" pixRead error!";
|
||||
if (m_api) {
|
||||
m_api->End();
|
||||
delete m_api;
|
||||
m_api = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
m_api->SetImage(image);
|
||||
textcontent = m_api->GetUTF8Text();
|
||||
qDebug() << "path:" << path << " Text:" << textcontent;
|
||||
pixDestroy(&image);
|
||||
m_api->Clear();
|
||||
|
||||
if (m_api) {
|
||||
m_api->End();
|
||||
delete m_api;
|
||||
m_api = nullptr;
|
||||
}
|
||||
|
||||
//多进程版本
|
||||
// tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
|
||||
// if (api->Init(NULL, "chi_sim")) {
|
||||
// m_api = new tesseract::TessBaseAPI();
|
||||
// if (m_api->Init(NULL, "chi_sim")) {
|
||||
// qDebug() << "Could not initialize tesseract.\n";
|
||||
// return;
|
||||
// }
|
||||
// api->SetVariable("user_defined_dpi", "1080");//图片中未标明分辨率的默认设置为1080
|
||||
// m_api->SetVariable("user_defined_dpi", "1080");//图片中未标明分辨率的默认设置为1080
|
||||
|
||||
// Pix *image = pixRead(path.toStdString().data());
|
||||
// if (!image) {
|
||||
// qDebug() << "path:" << path <<" pixRead error!";
|
||||
// if (api) {
|
||||
// api->End();
|
||||
// delete api;
|
||||
// api = nullptr;
|
||||
// if (m_api) {
|
||||
// m_api->End();
|
||||
// delete m_api;
|
||||
// m_api = nullptr;
|
||||
// }
|
||||
// return;
|
||||
// }
|
||||
// api->SetImage(image);
|
||||
// textcontent = api->GetUTF8Text();
|
||||
// m_api->SetImage(image);
|
||||
// textcontent = m_api->GetUTF8Text();
|
||||
// qDebug() << "path:" << path << " Text:" << textcontent;
|
||||
// pixDestroy(&image);
|
||||
// api->Clear();
|
||||
// m_api->Clear();
|
||||
|
||||
// if (api) {
|
||||
// api->End();
|
||||
// delete api;
|
||||
// api = nullptr;
|
||||
// if (m_api) {
|
||||
// m_api->End();
|
||||
// delete m_api;
|
||||
// m_api = nullptr;
|
||||
// }
|
||||
|
||||
//多进程版本
|
||||
//qDebug() << "path:" << path;
|
||||
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
|
||||
if (api->Init(NULL, "chi_sim")) {
|
||||
qDebug() << "Could not initialize tesseract.\n";
|
||||
return;
|
||||
}
|
||||
api->SetVariable("user_defined_dpi", "1080");//图片中未标明分辨率的默认设置为1080
|
||||
|
||||
Pix *image = pixRead(path.toStdString().data());
|
||||
if (!image) {
|
||||
qDebug() << "path:" << path <<" pixRead error!";
|
||||
if (api) {
|
||||
api->End();
|
||||
delete api;
|
||||
api = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
api->SetImage(image);
|
||||
textcontent = api->GetUTF8Text();
|
||||
//qDebug() << " Text:" << textcontent;
|
||||
pixDestroy(&image);
|
||||
api->Clear();
|
||||
|
||||
if (api) {
|
||||
api->End();
|
||||
delete api;
|
||||
api = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
OcrObject::OcrObject(QObject *parent) : QObject(parent)
|
||||
{
|
||||
init();
|
||||
// init();
|
||||
}
|
||||
|
||||
OcrObject::~OcrObject()
|
||||
{
|
||||
if (m_api) {
|
||||
m_api->End();
|
||||
delete m_api;
|
||||
m_api = nullptr;
|
||||
}
|
||||
// if (m_api) {
|
||||
// m_api->End();
|
||||
// delete m_api;
|
||||
// m_api = nullptr;
|
||||
// }
|
||||
}
|
||||
|
||||
void OcrObject::init()
|
||||
|
|
|
@ -68,7 +68,11 @@ bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString p
|
|||
<< SearchPluginIface::DescriptionInfo{tr("Path:"), path} \
|
||||
<< SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")};
|
||||
ri.actionKey = path;
|
||||
ri.type = 0;
|
||||
if (true == targetPhotographTypeMap[info.suffix()]) {
|
||||
ri.type = 1;//1为ocr图片文件
|
||||
} else {
|
||||
ri.type = 0;//0为默认文本文件
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -257,20 +261,7 @@ int FileContentSearch::keywordSearchContent() {
|
|||
Xapian::QueryParser qp;
|
||||
qp.set_default_op(Xapian::Query::OP_AND);
|
||||
qp.set_database(db);
|
||||
/*
|
||||
::friso::ResultMap ret;
|
||||
::friso::FrisoSegmentation::getInstance()->callSegement(ret, keyword.toLocal8Bit().data());
|
||||
for (::friso::ResultMap::iterator it_map = ret.begin(); it_map != ret.end(); ++it_map){
|
||||
target_str += it_map->first;
|
||||
target_str += " ";
|
||||
it_map->second.first.clear();
|
||||
::std::vector<size_t>().swap(it_map->second.first);
|
||||
}
|
||||
|
||||
ret.clear();
|
||||
ret.erase(ret.begin(), ret.end());
|
||||
::friso::ResultMap().swap(ret);
|
||||
*/
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
|
||||
//Creat a query
|
||||
std::string words;
|
||||
|
@ -318,6 +309,7 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
|
|||
double docScoreWeight = it.get_weight();
|
||||
Xapian::percent docScorePercent = it.get_percent();
|
||||
QString path = QString::fromStdString(doc.get_value(1));
|
||||
QString suffix = QString::fromStdString(doc.get_value(2));
|
||||
|
||||
SearchPluginIface::ResultInfo ri;
|
||||
if(!SearchManager::creatResultInfo(ri, path)) {
|
||||
|
|
Loading…
Reference in New Issue