ukui-search/libchinese-segmentation/chinese-segmentation.h

117 lines
4.3 KiB
C
Raw Normal View History

2022-06-18 10:46:14 +08:00
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CHINESESEGMENTATION_H
#define CHINESESEGMENTATION_H
2023-03-24 11:33:22 +08:00
#include <QString>
2022-06-18 10:46:14 +08:00
#include "libchinese-segmentation_global.h"
2022-10-21 11:21:35 +08:00
#include "common-struct.h"
2022-06-18 10:46:14 +08:00
2022-10-21 11:21:35 +08:00
class ChineseSegmentationPrivate;
2022-06-18 10:46:14 +08:00
class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
public:
static ChineseSegmentation *getInstance();
2022-10-21 11:21:35 +08:00
/**
* @brief ChineseSegmentation::callSegment
* extractor进行关键词提取使Mix方式初步分词使Idf词典进行关键词提取
*
* @param sentence
* @return vector<KeyWord>
*/
vector<KeyWord> callSegment(const string &sentence);
2023-03-24 11:33:22 +08:00
vector<KeyWord> callSegment(QString &sentence);
2022-10-21 11:21:35 +08:00
/**
* @brief ChineseSegmentation::callMixSegmentCutStr
* 使Mix方法进行分词使MP初步分词HMM进一步分词
*
* @param sentence
* @return vector<string>
*/
vector<string> callMixSegmentCutStr(const string& sentence);
/**
* @brief ChineseSegmentation::callMixSegmentCutWord
* callMixSegmentCutStr功能相同
* @param sentence
* @return vector<Word>
*/
vector<Word> callMixSegmentCutWord(const string& str);
/**
* @brief ChineseSegmentation::lookUpTagOfWord
* word的词性
* @param word
* @return string word的词性
*/
string lookUpTagOfWord(const string& word);
/**
* @brief ChineseSegmentation::getTagOfWordsInSentence
* 使Mix分词后获取每个词的词性
* @param sentence
* @return vector<pair<string, string>> (firsr)(second)
*/
vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
/**
* @brief ChineseSegmentation::callFullSegment
* 使Full进行分词Full会切出字典里所有的词
* @param sentence
* @return vector<Word>
*/
vector<Word> callFullSegment(const string& sentence);
/**
* @brief ChineseSegmentation::callQuerySegment
* 使Query进行分词使MixFull
* @param sentence
* @return vector<Word>
*/
vector<Word> callQuerySegment(const string& sentence);
/**
* @brief ChineseSegmentation::callHMMSegment
* 使HMM进行分词
* @param sentence
* @return vector<Word>
*/
vector<Word> callHMMSegment(const string& sentence);
/**
* @brief ChineseSegmentation::callMPSegment
* 使MP进行分词
* @param sentence
* @return vector<Word>
*/
vector<Word> callMPSegment(const string& sentence);
2022-06-18 10:46:14 +08:00
private:
explicit ChineseSegmentation();
2022-10-21 11:21:35 +08:00
~ChineseSegmentation() = default;
ChineseSegmentation(const ChineseSegmentation&) = delete;
ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;
2022-06-18 10:46:14 +08:00
private:
2022-10-21 11:21:35 +08:00
ChineseSegmentationPrivate *d = nullptr;
2022-06-18 10:46:14 +08:00
};
#endif // CHINESESEGMENTATION_H