stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
unicode.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "assert.hpp"
9#include "compat.hpp"
10#include "endian.hpp"
11#include "math.hpp"
12#include "string.hpp"
13#include <stdint.h>
14#ifndef _WIN32
15#include <iconv.h>
16#include <langinfo.h>
17#endif
18#include <map>
19#include <memory>
20#include <string>
21
22#if defined(__GNUC__)
23#pragma GCC diagnostic push
24#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
25#pragma GCC diagnostic ignored "-Wexit-time-destructors"
26#endif
27
28namespace stdex
29{
30 enum class charset_id : uint16_t {
31#ifdef _WIN32
32 system = CP_ACP,
33 oem = CP_OEMCP,
34 utf7 = CP_UTF7,
35 utf8 = CP_UTF8,
36 utf16 = 1200 /*CP_WINUNICODE*/,
37 utf32 = 12000,
38 windows1250 = 1250,
39 windows1251 = 1251,
40 windows1252 = 1252,
41#else
42 system = 0,
43 utf7,
44 utf8,
45 utf16,
46 utf32,
47 windows1250,
48 windows1251,
49 windows1252,
50
51 _max
52#endif
53 };
54
55#ifdef _WIN32
56 constexpr charset_id wchar_t_charset = charset_id::utf16;
57#ifdef _UNICODE
58 constexpr charset_id system_charset = charset_id::utf16;
59#else
60 constexpr charset_id system_charset = charset_id::system;
61#endif
62#else
63 constexpr charset_id wchar_t_charset = charset_id::utf32;
64 constexpr charset_id system_charset = charset_id::system;
65#endif
66
74 inline charset_id charset_from_name(_In_z_ const char* name)
75 {
76 struct charset_less {
77 bool operator()(_In_z_ const char* a, _In_z_ const char* b) const
78 {
79 return stricmp(a, b) < 0;
80 }
81 };
82 static const std::map<const char*, charset_id, charset_less> charsets = {
83 { "UNICODE-1-1-UTF-7", charset_id::utf7 },
84 { "UTF-7", charset_id::utf7 },
85 { "CSUNICODE11UTF7", charset_id::utf7 },
86
87 { "UTF-8", charset_id::utf8 },
88 { "UTF8", charset_id::utf8 },
89
90 { "UTF-16", charset_id::utf16 },
91#if BYTE_ORDER == BIG_ENDIAN
92 { "UTF-16BE", charset_id::utf16 },
93#else
94 { "UTF-16LE", charset_id::utf16 },
95#endif
96
97 { "UTF-32", charset_id::utf32 },
98#if BYTE_ORDER == BIG_ENDIAN
99 { "UTF-32BE", charset_id::utf32 },
100#else
101 { "UTF-32LE", charset_id::utf32 },
102#endif
103
104 { "CP1250", charset_id::windows1250 },
105 { "MS-EE", charset_id::windows1250 },
106 { "WINDOWS-1250", charset_id::windows1250 },
107
108 { "CP1251", charset_id::windows1251 },
109 { "MS-CYRL", charset_id::windows1251 },
110 { "WINDOWS-1251", charset_id::windows1251 },
111
112 { "CP1252", charset_id::windows1252 },
113 { "MS-ANSI", charset_id::windows1252 },
114 { "WINDOWS-1252", charset_id::windows1252 },
115 };
116 if (auto el = charsets.find(name); el != charsets.end())
117 return el->second;
118 return charset_id::system;
119 }
120
128 template <class TR = std::char_traits<char>, class AX = std::allocator<char>>
129 charset_id charset_from_name(_In_ const std::basic_string<char, TR, AX>& name)
130 {
131 return charset_from_name(name.c_str());
132 }
133
137 template <typename T_from, typename T_to>
139 {
140 protected:
141 charset_id m_from, m_to;
142
143 public:
144 charset_encoder(_In_ charset_id from, _In_ charset_id to) :
145 m_from(from),
146 m_to(to)
147 {
148#ifdef _WIN32
149 m_from_wincp = to_encoding(from);
150 m_to_wincp = to_encoding(to);
151#else
152 m_handle = iconv_open(to_encoding(to), to_encoding(from));
153 if (m_handle == (iconv_t)-1)
154 throw std::system_error(errno, std::system_category(), "iconv_open failed");
155#endif
156 }
157
158#ifndef _WIN32
160 {
161 iconv_close(m_handle);
162 }
163#endif
164
165 charset_id from_encoding() const { return m_from; }
166 charset_id to_encoding() const { return m_to; }
167
175 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
176 void strcat(
177 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
178 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
179 {
180 stdex_assert(src || !count_src);
181 count_src = strnlen<T_from>(src, count_src);
182 if (!count_src) _Unlikely_
183 return;
184
185#ifdef _WIN32
186 constexpr DWORD dwFlagsWCMB = 0;
187 constexpr LPCCH lpDefaultChar = NULL;
188
189 stdex_assert(src);
190 if (m_from_wincp == m_to_wincp) _Unlikely_{
191 dst.append(reinterpret_cast<const T_to*>(src), count_src);
192 return;
193 }
194
195#pragma warning(suppress: 4127)
196 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(wchar_t)) {
197 stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
198
199 // Try to convert to stack buffer first.
200 DWORD dwFlagsMBWC = static_cast<UINT>(m_from_wincp) < CP_UTF7 ? MB_PRECOMPOSED : 0;
201 WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
202#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
203 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
204 if (cch) {
205 // Append from stack.
206 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
207 return;
208 }
209 DWORD dwResult = GetLastError();
210 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
211 // Query the required output size. Allocate buffer. Then convert again.
212 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
213 size_t offset = dst.size();
214 dst.resize(offset + static_cast<size_t>(cch));
215 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), &dst[offset], cch);
216 dst.resize(offset + (count_src != SIZE_MAX ? wcsnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
217 return;
218 }
219 throw std::system_error(dwResult, std::system_category(), "MultiByteToWideChar failed");
220 }
221
222#pragma warning(suppress: 4127)
223 if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
224 stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
225
226 // Try to convert to stack buffer first.
227 CHAR szStackBuffer[1024 / sizeof(CHAR)];
228#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
229 int cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
230 if (cch) {
231 // Copy from stack. Be careful not to include zero terminator.
232 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
233 return;
234 }
235 DWORD dwResult = GetLastError();
236 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
237 // Query the required output size. Allocate buffer. Then convert again.
238 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
239 size_t offset = dst.size();
240 dst.resize(offset + static_cast<size_t>(cch));
241 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), &dst[offset], cch, lpDefaultChar, NULL);
242 dst.resize(offset + (count_src != SIZE_MAX ? strnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
243 return;
244 }
245 throw std::system_error(dwResult, std::system_category(), "WideCharToMultiByte failed");
246 }
247
248#pragma warning(suppress: 4127)
249 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
250 stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
251
252 // Try to convert to stack buffer first.
253 DWORD dwFlagsMBWC = static_cast<UINT>(m_from_wincp) < CP_UTF7 ? MB_PRECOMPOSED : 0, dwResult;
254 WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
255#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
256 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
257 if (cch) {
258 // Append from stack.
259 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
260 stdex_assert(count_inter < INT_MAX);
261
262 // Try to convert to stack buffer first.
263 CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
264#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
265 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
266 if (cch) {
267 // Copy from stack. Be careful not to include zero terminator.
268 dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
269 return;
270 }
271 dwResult = GetLastError();
272 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
273 // Query the required output size. Allocate buffer. Then convert again.
274 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
275 size_t offset = dst.size();
276 dst.resize(offset + cch);
277 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
278 dst.resize(offset + strnlen(&dst[offset], cch));
279 return;
280 }
281 throw std::system_error(dwResult, std::system_category(), "WideCharToMultiByte failed");
282 }
283 dwResult = GetLastError();
284 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
285 // Query the required output size. Allocate buffer. Then convert again.
286 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
287 std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
288 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
289 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
290
291 // Query the required output size. Allocate buffer. Then convert again.
292 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
293 size_t offset = dst.size();
294 dst.resize(offset + cch);
295 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
296 dst.resize(offset + strnlen(&dst[offset], cch));
297 return;
298 }
299 throw std::system_error(dwResult, std::system_category(), "MultiByteToWideChar failed");
300 }
301#else
302 dst.reserve(dst.size() + count_src);
303 T_to buf[1024 / sizeof(T_to)];
304 size_t src_size = stdex::mul(sizeof(T_from), count_src);
305 for (;;) {
306 T_to* output = &buf[0];
307 size_t output_size = sizeof(buf);
308 errno = 0;
309 iconv(m_handle, const_cast<char**>(reinterpret_cast<const char**>(&src)), &src_size, reinterpret_cast<char**>(&output), &output_size);
310 dst.append(buf, reinterpret_cast<T_to*>(reinterpret_cast<char*>(buf) + sizeof(buf) - output_size));
311 if (!errno)
312 break;
313 if (errno == E2BIG)
314 continue;
315 throw std::system_error(errno, std::system_category(), "iconv failed");
316 }
317#endif
318 }
319
326 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
327 void strcat(
328 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
329 _In_z_ const T_from* src)
330 {
331 strcat(dst, src, SIZE_MAX);
332 }
333
340 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
341 void strcat(
342 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
343 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
344 {
345 strcat(dst, src.data(), src.size());
346 }
347
355 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
356 void strcpy(
357 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
358 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
359 {
360 dst.clear();
361 strcat(dst, src, count_src);
362 }
363
370 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
371 void strcpy(
372 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
373 _In_z_ const T_from* src)
374 {
375 strcpy(dst, src, SIZE_MAX);
376 }
377
384 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
385 void strcpy(
386 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
387 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
388 {
389 strcpy(dst, src.data(), src.size());
390 }
391
398 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
399 std::basic_string<T_to, TR_to, AX_to> convert(_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
400 {
401 std::basic_string<T_to, TR_to, AX_to> dst;
402 strcat(dst, src, count_src);
403 return dst;
404 }
405
411 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
412 std::basic_string<T_to, TR_to, AX_to> convert(_In_z_ const T_from* src)
413 {
414 return convert(src, SIZE_MAX);
415 }
416
422 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
423 std::basic_string<T_to, TR_to, AX_to> convert(_In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
424 {
425 return convert(src.data(), src.size());
426 }
427
428 void clear()
429 {
430#ifndef _WIN32
431 iconv(m_handle, NULL, NULL, NULL, NULL);
432#endif
433 }
434
435 static charset_id system_charset()
436 {
437#ifdef _WIN32
438 return static_cast<charset_id>(GetACP());
439#else
440 return charset_from_name(nl_langinfo(CODESET));
441#endif
442 }
443
444#ifdef _WIN32
445 protected:
446 static UINT to_encoding(_In_ charset_id charset)
447 {
448 return
449 charset == charset_id::system ? GetACP() :
450 charset == charset_id::oem ? GetOEMCP() :
451 static_cast<UINT>(charset);
452 }
453
454 protected:
455 UINT m_from_wincp, m_to_wincp;
456#else
457 protected:
458 static const char* to_encoding(_In_ charset_id charset)
459 {
460 static const char* const encodings[static_cast<std::underlying_type_t<charset_id>>(charset_id::_max)] = {
461 "", // system
462 "UTF-7", // utf7
463 "UTF-8", // utf8
464#if BYTE_ORDER == BIG_ENDIAN
465 "UTF-16BE", // utf16
466 "UTF-32BE", // utf32
467#else
468 "UTF-16LE", // utf16
469 "UTF-32LE", // utf32
470#endif
471 "CP1250", // windows1250
472 "CP1251", // windows1251
473 "CP1252", // windows1252
474 };
475 return
476 charset == charset_id::system ? nl_langinfo(CODESET) :
477 encodings[static_cast<std::underlying_type_t<charset_id>>(charset)];
478 }
479
480 protected:
481 iconv_t m_handle;
482#endif
483 };
484
495 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
496#ifndef _WIN32
497 _Deprecated_("For better performance, consider a reusable charset_encoder")
498#endif
499 inline void strcat(
500 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
501 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
502 _In_ charset_id charset = charset_id::system)
503 {
504 charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
505 }
506
507 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
508 _Deprecated_("Use stdex::strcat")
509 inline void str2wstr(
510 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
511 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
512 _In_ charset_id charset = charset_id::system)
513 {
514 strcat(dst, src, count_src, charset);
515 }
516
526 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
527#ifndef _WIN32
528 _Deprecated_("For better performance, consider a reusable charset_encoder")
529#endif
530 inline void strcat(
531 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
532 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
533 _In_ charset_id charset = charset_id::system)
534 {
535 strcat(dst, src.data(), src.size(), charset);
536 }
537
538 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
539 _Deprecated_("Use stdex::strcat")
540 inline void str2wstr(
541 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
542 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
543 _In_ charset_id charset = charset_id::system)
544 {
545 strcat(dst, src, charset);
546 }
547
558 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
559#ifndef _WIN32
560 _Deprecated_("For better performance, consider a reusable charset_encoder")
561#endif
562 inline void strcpy(
563 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
564 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
565 _In_ charset_id charset = charset_id::system)
566 {
567 dst.clear();
568 strcat(dst, src, count_src, charset);
569 }
570
580 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
581#ifndef _WIN32
582 _Deprecated_("For better performance, consider a reusable charset_encoder")
583#endif
584 inline void strcpy(
585 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
586 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
587 _In_ charset_id charset = charset_id::system)
588 {
589 strcpy(dst, src.data(), src.size(), charset);
590 }
591
602#ifndef _WIN32
603 _Deprecated_("For better performance, consider a reusable charset_encoder")
604#endif
605 inline std::wstring str2wstr(
606 _In_z_ const char* src,
607 _In_ charset_id charset = charset_id::system)
608 {
609 std::wstring dst;
610 strcat(dst, src, SIZE_MAX, charset);
611 return dst;
612 }
613
625#ifndef _WIN32
626 _Deprecated_("For better performance, consider a reusable charset_encoder")
627#endif
628 inline std::wstring str2wstr(
629 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
630 _In_ charset_id charset = charset_id::system)
631 {
632 std::wstring dst;
633 strcat(dst, src, count_src, charset);
634 return dst;
635 }
636
647#ifndef _WIN32
648 _Deprecated_("For better performance, consider a reusable charset_encoder")
649#endif
650 inline std::wstring str2wstr(
651 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
652 _In_ charset_id charset = charset_id::system)
653 {
654 return str2wstr(src.data(), src.size(), charset);
655 }
656
667 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
668#ifndef _WIN32
669 _Deprecated_("For better performance, consider a reusable charset_encoder")
670#endif
671 inline void strcat(
672 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
673 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
674 _In_ charset_id charset = charset_id::system)
675 {
676 charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
677 }
678
679 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
680 _Deprecated_("Use stdex::strcat")
681 inline void wstr2str(
682 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
683 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
684 _In_ charset_id charset = charset_id::system)
685 {
686 strcat(dst, src, count_src, charset);
687 }
688
698 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
699#ifndef _WIN32
700 _Deprecated_("For better performance, consider a reusable charset_encoder")
701#endif
702 inline void strcat(
703 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
704 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
705 _In_ charset_id charset = charset_id::system)
706 {
707 strcat(dst, src.data(), src.size(), charset);
708 }
709
710 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
711 _Deprecated_("Use stdex::strcat")
712 inline void wstr2str(
713 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
714 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
715 _In_ charset_id charset = charset_id::system)
716 {
717 strcat(dst, src, charset);
718 }
719
730 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
731#ifndef _WIN32
732 _Deprecated_("For better performance, consider a reusable charset_encoder")
733#endif
734 inline void strcpy(
735 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
736 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
737 _In_ charset_id charset = charset_id::system)
738 {
739 dst.clear();
740 strcat(dst, src, count_src, charset);
741 }
742
752 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
753#ifndef _WIN32
754 _Deprecated_("For better performance, consider a reusable charset_encoder")
755#endif
756 inline void strcpy(
757 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
758 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
759 _In_ charset_id charset = charset_id::system)
760 {
761 strcpy(dst, src.data(), src.size(), charset);
762 }
763
774#ifndef _WIN32
775 _Deprecated_("For better performance, consider a reusable charset_encoder")
776#endif
777 inline std::string wstr2str(
778 _In_z_ const wchar_t* src,
779 _In_ charset_id charset = charset_id::system)
780 {
781 std::string dst;
782 strcat(dst, src, SIZE_MAX, charset);
783 return dst;
784 }
785
797#ifndef _WIN32
798 _Deprecated_("For better performance, consider a reusable charset_encoder")
799#endif
800 inline std::string wstr2str(
801 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
802 _In_ charset_id charset = charset_id::system)
803 {
804 std::string dst;
805 strcat(dst, src, count_src, charset);
806 return dst;
807 }
808
819#ifndef _WIN32
820 _Deprecated_("For better performance, consider a reusable charset_encoder")
821#endif
822 inline std::string wstr2str(
823 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
824 _In_ charset_id charset = charset_id::system)
825 {
826 return wstr2str(src.data(), src.size(), charset);
827 }
828
829#ifdef _WIN32
839 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
840 size_t normalizecat(
841 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
842 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
843 {
844 count_src = strnlen(src, count_src);
845 size_t count_dst = dst.size();
846 dst.resize(count_dst + count_src);
847 stdex_assert(count_src + 1 < INT_MAX);
848#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpSrcString parameter wrong?
849 int r = NormalizeString(NormalizationC, src, static_cast<int>(count_src), dst.data() + count_dst, static_cast<int>(count_src + 1));
850 if (r >= 0)
851 dst.resize(count_dst + r);
852 else
853#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the _Src parameter wrong?
854 memcpy(dst.data() + count_dst, src, count_src * sizeof(wchar_t));
855 return dst.size();
856 }
857
866 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
867 size_t normalizecat(
868 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
869 _In_ const wchar_t (&src)[N])
870 {
871 return normalizecat(dst, src, N);
872 }
873
882 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
883 size_t normalizecat(
884 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
885 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
886 {
887 return normalizecat(dst, src.data(), src.size());
888 }
889
899 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
900 size_t normalize(
901 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
902 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
903 {
904 dst.clear();
905 return normalizecat(dst, src, count_src);
906 }
907
916 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
917 size_t normalize(
918 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
919 _In_ const wchar_t(&src)[N])
920 {
921 return normalize(dst, src, N);
922 }
923
932 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
933 size_t normalize(
934 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
935 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
936 {
937 return normalize(dst, src.data(), src.size());
938 }
939
948 inline std::wstring normalize(_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
949 {
950 std::wstring dst;
951 normalizecat(dst, src, count_src);
952 return dst;
953 }
954
962 template <size_t N>
963 std::wstring normalize(_In_ const wchar_t(&src)[N])
964 {
965 std::wstring dst;
966 normalizecat(dst, src, N);
967 return dst;
968 }
969
977 inline std::wstring normalize(_In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
978 {
979 std::wstring dst;
980 normalizecat(dst, src.data(), src.size());
981 return dst;
982 }
983#endif
984}
985
986#if defined(__GNUC__)
987#pragma GCC diagnostic pop
988#endif
Encoding converter context.
Definition unicode.hpp:139
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string and append to string.
Definition unicode.hpp:176
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string and append to string.
Definition unicode.hpp:341
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string.
Definition unicode.hpp:371
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string and append to string.
Definition unicode.hpp:327
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string.
Definition unicode.hpp:356
std::basic_string< T_to, TR_to, AX_to > convert(const T_from *src)
Return converted string.
Definition unicode.hpp:412
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string.
Definition unicode.hpp:385
std::basic_string< T_to, TR_to, AX_to > convert(const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Return converted string.
Definition unicode.hpp:423
std::basic_string< T_to, TR_to, AX_to > convert(_In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Return converted string.
Definition unicode.hpp:399