当字符串中包含中文,字符数的统计会有问题,体现在调试时,提示字符串中的字符无效,原因是字符编码为utf-8而IDE显示了ansi编码,导致如此,将多字符转为宽字符,即ANSI转GBK
源码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
| #include <Windows.h> #include <tchar.h> #include <vector> #include <string>
#pragma warning(disable:4996) using namespace std;
std::string WStringToString(const std::wstring& ws) { std::string strLocale = setlocale(LC_ALL, ""); const wchar_t* wchSrc = ws.c_str(); size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1; char *chDest = new char[nDestSize]; memset(chDest, 0, nDestSize); wcstombs(chDest, wchSrc, nDestSize); std::string strResult = chDest; delete[]chDest; setlocale(LC_ALL, strLocale.c_str()); return strResult; }
std::wstring StringToWString(const std::string &str) { std::wstring wContext = L""; int len = MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), NULL, 0); WCHAR* buffer = new WCHAR[len + 1]; MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), buffer, len); buffer[len] = '\0'; wContext.append(buffer); delete[] buffer;
return wContext; }
std::vector<std::string> SpiteStringCharacter(std::string context) { std::vector<std::string> res;
std::wstring wContext = StringToWString(context); for (int i = 0; i < wContext.length(); ++i) { std::wstring tmp = wContext.substr(i, 1); res.push_back(WStringToString(tmp)); }
return res; }
bool IsChineseChar(std::wstring value) { if (value.size() == 1) { unsigned char* pCh = (unsigned char*)&value[0]; if (((*pCh >= 0) && (*pCh <= 0xff)) && (*(pCh + 1) >= 0x4e && *(pCh + 1) <= 0x9f)) { return true; } }
return false; }
int GetStringChineseCharCount(std::string context) { std::wstring wContext = StringToWString(context);
int chineseCharCount = 0; for (int i = 0; i < wContext.length(); ++i) { if (IsChineseChar(wContext.substr(i, 1))) { ++chineseCharCount; } }
return chineseCharCount; }
|
示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| #include <Windows.h> #include <tchar.h> #include <vector> #include <string> #include <iostream> int main() { std::string value = "类型A123"; std::vector<std::string> spliteList = SpiteStringCharacter(value); int count = GetStringChineseCharCount(value); std::cout<<"一共有 "<< spliteList.size()<<" 个字符,其中有 "<< count <<" 个汉字."<<std::endl;
return 0;
|
其他
另一种统计中英文字符的方法,可以参考Post not found: C-计算中英文字符个数 C-计算中英文字符个数