408 lines
11 KiB
C++
408 lines
11 KiB
C++
/*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (C) 2013 Yanyi Wu
|
|
* Copyright (C) 2023, KylinSoft Co., Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
/************************************
|
|
* file enc : ascii
|
|
* author : wuyanyi09@gmail.com
|
|
************************************/
|
|
#ifndef LIMONP_STR_FUNCTS_H
|
|
#define LIMONP_STR_FUNCTS_H
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <memory.h>
|
|
#include <sys/types.h>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <algorithm>
|
|
#include <cctype>
|
|
#include <map>
|
|
#include <functional>
|
|
#include <locale>
|
|
#include <sstream>
|
|
#include <iterator>
|
|
#include <algorithm>
|
|
#include "StdExtension.hpp"
|
|
|
|
namespace limonp {
|
|
using namespace std;
|
|
inline string StringFormat(const char* fmt, ...) {
|
|
int size = 256;
|
|
std::string str;
|
|
va_list ap;
|
|
while (1) {
|
|
str.resize(size);
|
|
va_start(ap, fmt);
|
|
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
|
va_end(ap);
|
|
if (n > -1 && n < size) {
|
|
str.resize(n);
|
|
return str;
|
|
}
|
|
if (n > -1)
|
|
size = n + 1;
|
|
else
|
|
size *= 2;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
template<class T>
|
|
void Join(T begin, T end, string& res, const string& connector) {
|
|
if(begin == end) {
|
|
return;
|
|
}
|
|
stringstream ss;
|
|
ss<<*begin;
|
|
begin++;
|
|
while(begin != end) {
|
|
ss << connector << *begin;
|
|
begin ++;
|
|
}
|
|
res = ss.str();
|
|
}
|
|
|
|
template<class T>
|
|
string Join(T begin, T end, const string& connector) {
|
|
string res;
|
|
Join(begin ,end, res, connector);
|
|
return res;
|
|
}
|
|
|
|
inline string& Upper(string& str) {
|
|
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
|
return str;
|
|
}
|
|
|
|
inline string& Lower(string& str) {
|
|
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
|
return str;
|
|
}
|
|
|
|
inline bool IsSpace(unsigned c) {
|
|
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
|
return c > 0xff ? false : std::isspace(c & 0xff);
|
|
}
|
|
|
|
inline std::string& LTrim(std::string &s) {
|
|
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
|
|
return s;
|
|
}
|
|
|
|
inline std::string& RTrim(std::string &s) {
|
|
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
|
|
return s;
|
|
}
|
|
|
|
inline std::string& Trim(std::string &s) {
|
|
return LTrim(RTrim(s));
|
|
}
|
|
|
|
inline std::string& LTrim(std::string & s, char x) {
|
|
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
|
|
return s;
|
|
}
|
|
|
|
inline std::string& RTrim(std::string & s, char x) {
|
|
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
|
|
return s;
|
|
}
|
|
|
|
inline std::string& Trim(std::string &s, char x) {
|
|
return LTrim(RTrim(s, x), x);
|
|
}
|
|
|
|
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
|
|
res.clear();
|
|
size_t Start = 0;
|
|
size_t end = 0;
|
|
string sub;
|
|
while(Start < src.size()) {
|
|
end = src.find_first_of(pattern, Start);
|
|
if(string::npos == end || res.size() >= maxsplit) {
|
|
sub = src.substr(Start);
|
|
res.push_back(sub);
|
|
return;
|
|
}
|
|
sub = src.substr(Start, end - Start);
|
|
res.push_back(sub);
|
|
Start = end + 1;
|
|
}
|
|
return;
|
|
}
|
|
|
|
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
|
|
vector<string> res;
|
|
Split(src, res, pattern, maxsplit);
|
|
return res;
|
|
}
|
|
|
|
inline bool StartsWith(const string& str, const string& prefix) {
|
|
if(prefix.length() > str.length()) {
|
|
return false;
|
|
}
|
|
return 0 == str.compare(0, prefix.length(), prefix);
|
|
}
|
|
|
|
inline bool EndsWith(const string& str, const string& suffix) {
|
|
if(suffix.length() > str.length()) {
|
|
return false;
|
|
}
|
|
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
|
}
|
|
|
|
inline bool IsInStr(const string& str, char ch) {
|
|
return str.find(ch) != string::npos;
|
|
}
|
|
|
|
inline uint16_t TwocharToUint16(char high, char low) {
|
|
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
|
}
|
|
|
|
template <class Uint16Container>
|
|
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
|
if(!str) {
|
|
return false;
|
|
}
|
|
char ch1, ch2;
|
|
uint16_t tmp;
|
|
vec.clear();
|
|
for(size_t i = 0; i < len;) {
|
|
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
vec.push_back(str[i]);
|
|
i++;
|
|
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
|
ch1 = (str[i] >> 2) & 0x07;
|
|
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
|
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
vec.push_back(tmp);
|
|
i += 2;
|
|
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
|
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
|
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
|
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
vec.push_back(tmp);
|
|
i += 3;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <class Uint16Container>
|
|
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
|
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
|
}
|
|
|
|
template <class Uint32Container>
|
|
bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
|
|
uint32_t tmp;
|
|
vec.clear();
|
|
for(size_t i = 0; i < size;) {
|
|
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
// 7bit, total 7bit
|
|
tmp = (uint8_t)(str[i]) & 0x7f;
|
|
i++;
|
|
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
|
|
// 5bit, total 5bit
|
|
tmp = (uint8_t)(str[i]) & 0x1f;
|
|
|
|
// 6bit, total 11bit
|
|
tmp <<= 6;
|
|
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
i += 2;
|
|
} else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
|
|
// 4bit, total 4bit
|
|
tmp = (uint8_t)(str[i]) & 0x0f;
|
|
|
|
// 6bit, total 10bit
|
|
tmp <<= 6;
|
|
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
|
|
// 6bit, total 16bit
|
|
tmp <<= 6;
|
|
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
|
|
|
i += 3;
|
|
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
|
|
// 3bit, total 3bit
|
|
tmp = (uint8_t)(str[i]) & 0x07;
|
|
|
|
// 6bit, total 9bit
|
|
tmp <<= 6;
|
|
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
|
|
|
// 6bit, total 15bit
|
|
tmp <<= 6;
|
|
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
|
|
|
// 6bit, total 21bit
|
|
tmp <<= 6;
|
|
tmp |= (uint8_t)(str[i+3]) & 0x3f;
|
|
|
|
i += 4;
|
|
} else {
|
|
return false;
|
|
}
|
|
vec.push_back(tmp);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <class Uint32Container>
|
|
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
|
return Utf8ToUnicode32(str.data(), str.size(), vec);
|
|
}
|
|
|
|
inline int UnicodeToUtf8Bytes(uint32_t ui){
|
|
if(ui <= 0x7f) {
|
|
return 1;
|
|
} else if(ui <= 0x7ff) {
|
|
return 2;
|
|
} else if(ui <= 0xffff) {
|
|
return 3;
|
|
} else {
|
|
return 4;
|
|
}
|
|
}
|
|
|
|
template <class Uint32ContainerConIter>
|
|
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
|
res.clear();
|
|
uint32_t ui;
|
|
while(begin != end) {
|
|
ui = *begin;
|
|
if(ui <= 0x7f) {
|
|
res += char(ui);
|
|
} else if(ui <= 0x7ff) {
|
|
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
|
res += char((ui & 0x3f) | 0x80);
|
|
} else if(ui <= 0xffff) {
|
|
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
|
res += char(((ui >> 6) & 0x3f) | 0x80);
|
|
res += char((ui & 0x3f) | 0x80);
|
|
} else {
|
|
res += char(((ui >> 18) & 0x03) | 0xf0);
|
|
res += char(((ui >> 12) & 0x3f) | 0x80);
|
|
res += char(((ui >> 6) & 0x3f) | 0x80);
|
|
res += char((ui & 0x3f) | 0x80);
|
|
}
|
|
begin ++;
|
|
}
|
|
}
|
|
|
|
template <class Uint16ContainerConIter>
|
|
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
|
res.clear();
|
|
uint16_t ui;
|
|
while(begin != end) {
|
|
ui = *begin;
|
|
if(ui <= 0x7f) {
|
|
res += char(ui);
|
|
} else if(ui <= 0x7ff) {
|
|
res += char(((ui>>6) & 0x1f) | 0xc0);
|
|
res += char((ui & 0x3f) | 0x80);
|
|
} else {
|
|
res += char(((ui >> 12) & 0x0f )| 0xe0);
|
|
res += char(((ui>>6) & 0x3f )| 0x80 );
|
|
res += char((ui & 0x3f) | 0x80);
|
|
}
|
|
begin ++;
|
|
}
|
|
}
|
|
|
|
|
|
template <class Uint16Container>
|
|
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
|
|
vec.clear();
|
|
if(!str) {
|
|
return true;
|
|
}
|
|
size_t i = 0;
|
|
while(i < len) {
|
|
if(0 == (str[i] & 0x80)) {
|
|
vec.push_back(uint16_t(str[i]));
|
|
i++;
|
|
} else {
|
|
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
|
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
|
|
vec.push_back(tmp);
|
|
i += 2;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <class Uint16Container>
|
|
bool GBKTrans(const string& str, Uint16Container& vec) {
|
|
return GBKTrans(str.c_str(), str.size(), vec);
|
|
}
|
|
|
|
template <class Uint16ContainerConIter>
|
|
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
|
res.clear();
|
|
//pair<char, char> pa;
|
|
char first, second;
|
|
while(begin != end) {
|
|
//pa = uint16ToChar2(*begin);
|
|
first = ((*begin)>>8) & 0x00ff;
|
|
second = (*begin) & 0x00ff;
|
|
if(first & 0x80) {
|
|
res += first;
|
|
res += second;
|
|
} else {
|
|
res += second;
|
|
}
|
|
begin++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* format example: "%Y-%m-%d %H:%M:%S"
|
|
*/
|
|
// inline void GetTime(const string& format, string& timeStr) {
|
|
// time_t timeNow;
|
|
// time(&timeNow);
|
|
// timeStr.resize(64);
|
|
// size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
|
|
// timeStr.resize(len);
|
|
// }
|
|
|
|
inline string PathJoin(const string& path1, const string& path2) {
|
|
if(EndsWith(path1, "/")) {
|
|
return path1 + path2;
|
|
}
|
|
return path1 + "/" + path2;
|
|
}
|
|
|
|
}
|
|
#endif
|