Samchon Framework for CPP  1.0.0
Charset.hpp
1 #pragma once
2 #include <samchon/API.hpp>
3 
4 #include <string>
5 #include <codecvt>
6 #include <samchon/WeakString.hpp>
7 
8 #ifdef _WIN32
9 #include <atlstr.h>
10 #endif
11 
12 namespace samchon
13 {
14 namespace library
15 {
35  class Charset
36  {
37  public:
38  enum : int
39  {
40  MULTIBYTE = 4,
41  UTF8 = 8
42  };
43 
56  static auto toMultibyte(const std::string &source) -> std::string
57  {
58  std::wstring &wstr = toUnicode(source, UTF8);
59  std::string &dest = toMultibyte(wstr);
60 
61  return dest;
62  };
63 
76  static auto toMultibyte(const std::wstring &source) -> std::string
77  {
78 #ifdef _WIN32
79  int len = WideCharToMultiByte(CP_ACP, 0, &source[0], -1, NULL, 0, NULL, NULL);
80  std::string str(len, 0);
81  WideCharToMultiByte(CP_ACP, 0, &source[0], -1, &str[0], len, NULL, NULL);
82 
83  return str;
84 #else
85  using namespace std;
86  typedef codecvt<wchar_t, char, mbstate_t> codecvt_t;
87 
88  locale &loc = locale("");
89 
90  codecvt_t const& codecvt = use_facet<codecvt_t>(loc);
91  mbstate_t state = mbstate_t();
92  vector<char> buf(source.size() * codecvt.max_length());
93  wchar_t const* in_next = source.c_str();
94  char* out_next = &buf[0];
95 
96  codecvt_base::result r =
97  codecvt.out
98  (
99  state,
100  source.c_str(), source.c_str() + source.size(), in_next,
101  &buf[0], &buf[0] + buf.size(), out_next
102  );
103 
104  if (r == codecvt_base::error)
105  throw runtime_error("can't convert wstring to string");
106 
107  return string(buf.begin(), buf.end());
108 #endif
109  };
110 
122  static auto toUTF8(const std::string &source) -> std::string
123  {
124  std::wstring &wstr = toUnicode(source, MULTIBYTE);
125  std::string &dest = toUTF8(wstr);
126 
127  if (dest.back() == NULL)
128  dest.pop_back();
129 
130  return dest;
131  };
132 
145  static auto toUTF8(const std::wstring &source) -> std::string
146  {
147 #ifdef _WIN32
148  int len = WideCharToMultiByte(CP_UTF8, 0, &source[0], -1, NULL, 0, NULL, NULL);
149  std::string str(len, 0);
150  WideCharToMultiByte(CP_UTF8, 0, &source[0], -1, &str[0], len, NULL, NULL);
151 
152  return str;
153 #else
154  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utf8Converter;
155  return utf8Converter.to_bytes(source);
156 #endif
157  };
158 
166  static auto toUnicode(const std::string &source, int charset) -> std::wstring
167  {
168 #ifdef _WIN32
169  if (charset == MULTIBYTE)
170  {
171  int nLen = MultiByteToWideChar(CP_ACP, 0, &source[0], (int)source.size(), NULL, NULL);
172  std::wstring wstr(nLen, 0);
173  MultiByteToWideChar(CP_ACP, 0, &source[0], (int)source.size(), &wstr[0], nLen);
174 
175  return wstr;
176  }
177  else if (charset == UTF8)
178  {
179  int nLen = MultiByteToWideChar(CP_UTF8, 0, &source[0], (int)source.size(), NULL, NULL);
180  std::wstring wstr(nLen, 0);
181  MultiByteToWideChar(CP_UTF8, 0, &source[0], (int)source.size(), &wstr[0], nLen);
182 
183  return wstr;
184  }
185  else
186  return L"";
187 
188 #else
189  using namespace std;
190 
191  if (charset == MULTIBYTE)
192  {
193  locale &loc = locale("");
194 
195  typedef codecvt<wchar_t, char, mbstate_t> codecvt_t;
196  codecvt_t const& codecvt = use_facet<codecvt_t>(loc);
197  mbstate_t state = mbstate_t();
198  vector<wchar_t> buf(source.size());
199  char const* in_next = source.c_str();
200  wchar_t* out_next = &buf[0];
201 
202  codecvt_base::result r =
203  codecvt.in
204  (
205  state,
206  source.c_str(), source.c_str() + source.size(), in_next,
207  &buf[0], &buf[0] + buf.size(), out_next
208  );
209 
210  if (r == codecvt_base::error)
211  throw runtime_error("can't convert string to wstring");
212 
213  return wstring(buf.begin(), buf.end());
214  }
215  else if (charset == UTF8)
216  {
217  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utf8Converter;
218  wstring &wstr = move(utf8Converter.from_bytes(source));
219 
220  return wstr;
221  }
222  else
223  return L"";
224 #endif
225  };
226  };
227 };
228 };
static auto toMultibyte(const std::string &source) -> std::string
Convert utf-8 to multibyte.
Definition: Charset.hpp:56
static auto toUTF8(const std::wstring &source) -> std::string
Convert unicode to utf-8.
Definition: Charset.hpp:145
static auto toMultibyte(const std::wstring &source) -> std::string
Convert unicode to multibyte.
Definition: Charset.hpp:76
static auto toUTF8(const std::string &source) -> std::string
Convert multibyte to utf-8.
Definition: Charset.hpp:122
static auto toUnicode(const std::string &source, int charset) -> std::wstring
Convert multibyte or utf-8 to unicode.
Definition: Charset.hpp:166
A utility class supporting conversion between multiple character-sets.
Definition: Charset.hpp:35