ukui-search/libsearch/index/file-reader.cpp

57 lines
1.9 KiB
C++
Raw Normal View History

2021-01-29 11:43:07 +08:00
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
2020-12-30 15:31:36 +08:00
#include "file-reader.h"
#include <ocr-utils.h>
#include <mime-utils.h>
#include <mutex>
#include "file-extraction-result.h"
#include "common.h"
#include "file-indexer-config.h"
2021-12-14 14:43:35 +08:00
using namespace UkuiSearch;
FileReader *g_instance = nullptr;
std::once_flag g_instanceFlag;
FileReader *FileReader::getInstance()
2022-10-26 18:01:40 +08:00
{
std::call_once(g_instanceFlag, [] () {
g_instance = new FileReader;
});
return g_instance;
2020-12-30 15:31:36 +08:00
}
FileReader::FileReader()
= default;
2022-10-26 18:01:40 +08:00
void FileReader::getTextContent(const QString &path, QString &textContent, const QString &suffix)
{
if(FileIndexerConfig::getInstance()->ocrContentIndexTarget()[suffix]) {
textContent = UkuiFileMetadata::OcrUtils::getTextInPicture(path);
return;
}
QString mimeType = UkuiFileMetadata::MimeUtils::strictMimeType(path, {}).name();
QList<UkuiFileMetadata::Extractor*> extractors = m_extractorManager.fetchExtractors(mimeType);
FileExtractionResult result(path, mimeType, UkuiFileMetadata::ExtractionResult::Flag::ExtractPlainText);
for(auto extractor : extractors) {
extractor->extract(&result);
if(!result.text().isEmpty()) {
textContent = result.text();
break;
}
2021-03-04 14:10:00 +08:00
}
}