1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
| #include <iostream> #include <string> #include <Windows.h> using namespace std;
string wstring2string(wstring wstr,UINT nCode) { string result; int len = WideCharToMultiByte(nCode, 0, wstr.c_str(), wstr.size(), NULL, 0, NULL, NULL); char* buffer = new char[len + 1]; WideCharToMultiByte(nCode, 0, wstr.c_str(), wstr.size(), buffer, len, NULL, NULL); buffer[len] = '\0'; result.append(buffer); delete[] buffer; return result; }
enum Encode { ANSI = 1, UTF16_LE, UTF16_BE, UTF8_BOM, UTF8 }; Encode IsUtf8Data(const uint8_t* data, size_t size) { bool bAnsi = true; uint8_t ch = 0x00; int32_t nBytes = 0; for (auto i = 0; i < size; i++){ ch = *(data + i); if ((ch & 0x80) != 0x00) bAnsi = false; if (nBytes == 0){ if (ch >= 0x80){ if (ch >= 0xFC && ch <= 0xFD) nBytes = 6; else if (ch >= 0xF8) nBytes = 5; else if (ch >= 0xF0) nBytes = 4; else if (ch >= 0xE0) nBytes = 3; else if (ch >= 0xC0) nBytes = 2; else return Encode::ANSI; nBytes--; } }else{ if ((ch & 0xC0) != 0x80) return Encode::ANSI; nBytes--; } } if (nBytes > 0 || bAnsi) return Encode::ANSI; return Encode::UTF8; } Encode DetectEncode(const uint8_t* data, size_t size) { if (size > 2 && data[0] == 0xFF && data[1] == 0xFE) return Encode::UTF16_LE; else if (size > 2 && data[0] == 0xFE && data[1] == 0xFF) return Encode::UTF16_BE; else if (size > 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF) return Encode::UTF8_BOM; else return IsUtf8Data(data, size); }
|