当字符串中包含中文,字符数的统计会有问题,体现在调试时,提示字符串中的字符无效,原因是字符编码为utf-8而IDE显示了ansi编码,导致如此,将多字符转为宽字符,即ANSI转GBK

源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// CharNums.h
#include <Windows.h>
#include <tchar.h>
#include <vector>
#include <string>

#pragma warning(disable:4996)
using namespace std;

// 宽字符 -> 多字符
std::string WStringToString(const std::wstring& ws)
{
std::string strLocale = setlocale(LC_ALL, "");
const wchar_t* wchSrc = ws.c_str();
size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1;
char *chDest = new char[nDestSize];
memset(chDest, 0, nDestSize);
wcstombs(chDest, wchSrc, nDestSize);
std::string strResult = chDest;
delete[]chDest;
setlocale(LC_ALL, strLocale.c_str());
return strResult;
}

// 多字符 -> 宽字符
std::wstring StringToWString(const std::string &str)
{
std::wstring wContext = L"";
int len = MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), NULL, 0);
WCHAR* buffer = new WCHAR[len + 1];
MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), buffer, len);
buffer[len] = '\0';
wContext.append(buffer);
delete[] buffer;

return wContext;
}

// 字符串存储到vector中,一个字符(汉字算一个字符)一个位置
std::vector<std::string> SpiteStringCharacter(std::string context)
{
std::vector<std::string> res;

std::wstring wContext = StringToWString(context);
for (int i = 0; i < wContext.length(); ++i)
{
std::wstring tmp = wContext.substr(i, 1);
res.push_back(WStringToString(tmp));
}

return res;
}

// 判断是否为中文字符
bool IsChineseChar(std::wstring value)
{
if (value.size() == 1)
{
unsigned char* pCh = (unsigned char*)&value[0];
if (((*pCh >= 0) && (*pCh <= 0xff)) && (*(pCh + 1) >= 0x4e && *(pCh + 1) <= 0x9f))
{
return true;
}
}

return false;
}

// 获取字符串中中文字符数量
int GetStringChineseCharCount(std::string context)
{
std::wstring wContext = StringToWString(context);

int chineseCharCount = 0;
for (int i = 0; i < wContext.length(); ++i)
{
if (IsChineseChar(wContext.substr(i, 1)))
{
++chineseCharCount;
}
}

return chineseCharCount;
}

示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#include <Windows.h>
#include <tchar.h>
#include <vector>
#include <string>
#include <iostream>
int main()
{
std::string value = "类型A123";
std::vector<std::string> spliteList = SpiteStringCharacter(value);
int count = GetStringChineseCharCount(value);
std::cout<<"一共有 "<< spliteList.size()<<" 个字符,其中有 "<<
count <<" 个汉字."<<std::endl;

return 0;

其他

另一种统计中英文字符的方法,可以参考Post not found: C-计算中英文字符个数 C-计算中英文字符个数