CodePorting.Translator Cs2Cpp
CodePorting.Translator.Cs2Cpp.Framework
char.h
1
3#pragma once
4
5#include "fwd.h"
6#include "system/globalization/unicode_category.h"
7#include "system/primitive_types.h"
8#include "system/globalization/culture_info.h"
9#include <cctype>
10
11namespace System {
12
16class Char
17{
18 RTTI_INFO_VALUE_TYPE(Char)
19public:
20
25 {
27 }
28
32 static bool IsSurrogate(char_t c)
33 {
35 }
36
41 static ASPOSECPP_SHARED_API bool IsSurrogate(const String& s, int index);
42
47 static bool IsSurrogatePair(char_t highSurrogate, char_t lowSurrogate)
48 {
49 return 0xD800 <= (uint32_t)highSurrogate && (uint32_t)highSurrogate <= 0xDBFF &&
50 0xDC00 <= (uint32_t)lowSurrogate && (uint32_t)lowSurrogate <= 0xDFFF;
51 }
52
57 static ASPOSECPP_SHARED_API bool IsSurrogatePair(const String& str, int index);
58
63 static ASPOSECPP_SHARED_API bool IsHighSurrogate(const String& s, int index);
64
69 static bool IsHighSurrogate(const char_t* str, int idx) { return IsHighSurrogate(*(str+idx)); }
70
74 static bool IsHighSurrogate(char_t c)
75 {
76 return (uint32_t)c >= 0xD800 && (uint32_t)c <= 0xDBFF;
77 }
78
83 static bool IsLowSurrogate(const char_t* str, int idx) { return IsLowSurrogate(*(str+idx)); }
84
88 static bool IsLowSurrogate(char_t c)
89 {
90 return (uint32_t)c >= 0xDC00 && (uint32_t)c <= 0xDFFF;
91 }
92
97 static bool IsControl(const char_t* str, int idx) { return IsControl(*(str + idx)); }
98
102 static bool IsControl(char_t c)
103 {
105 }
106
111 static bool IsDigit(const char_t* str, int idx) { return IsDigit(*(str + idx)); }
112
117 static bool IsDigit(const String& str, const int32_t idx) { return IsDigit(str[idx]); }
118
122 static bool IsDigit(char_t c)
123 {
125 }
126
131 static bool IsLetter(const char_t* str, int idx) { return IsLetter(*(str + idx)); }
132
136 static bool IsLetter(char_t c)
137 {
139 }
140
145 static bool IsLetterOrDigit(const char_t* str, int idx) { return IsLetterOrDigit(*(str + idx)); }
146
150 static bool IsLetterOrDigit(char_t c)
151 {
152 int category = s_category_table[c];
153 return (category <= ((int)Globalization::UnicodeCategory::OtherLetter) ||
155 }
156
161 static bool IsNumber(const char_t* str, int idx) { return IsNumber(*(str + idx)); }
162
166 static bool IsNumber(char_t c)
167 {
168 uint8_t category = s_category_table[c];
169 return category >= (uint8_t)Globalization::UnicodeCategory::DecimalDigitNumber &&
171 }
172
177 static bool IsSeparator(const char_t* str, int idx) { return IsSeparator(*(str + idx)); }
178
182 static bool IsSeparator(char_t c)
183 {
184 uint8_t category = s_category_table[c];
185 return category >= (uint8_t)Globalization::UnicodeCategory::SpaceSeparator &&
187 }
188
193 static bool IsSymbol(const char_t* str, int idx) { return IsSymbol(*(str + idx)); }
194
198 static bool IsSymbol(char_t c)
199 {
200 uint8_t category = s_category_table[c];
201 return category >= (uint8_t)Globalization::UnicodeCategory::MathSymbol &&
203 }
204
209 static bool IsWhiteSpace(const char_t* str, int idx) { return IsWhiteSpace(*(str + idx)); }
210
214 static bool IsWhiteSpace(char_t c)
215 {
216 if (c < 0x1680)
217 return c == 0x20 || (c >= 0x09 && c <= 0x0d) || c == 0x85 || c == 0xA0;
218
219 uint8_t category = s_category_table[c];
220 return category > (uint8_t)Globalization::UnicodeCategory::OtherNumber &&
222 }
223
229 static ASPOSECPP_SHARED_API bool IsWhiteSpace(const String& str, int index);
230
234 static constexpr bool IsAsciiWhiteSpace(char_t c)
235 {
236 return c == 0x20 || (c >= 0x09 && c <= 0x0d);
237 }
238
245 static ASPOSECPP_SHARED_API bool IsUpper(const String& str, int idx);
246
251 static bool IsUpper(const char_t* str, int idx) { return IsUpper(*(str + idx)); }
252
256 static bool IsUpper(char_t c)
257 {
259 }
260
265 static bool IsLower(const char_t* str, int idx) { return IsLower(*(str + idx)); }
266
270 static bool IsLower(char_t c)
271 {
273 }
274
281 static ASPOSECPP_SHARED_API bool IsLower(const String& str, int idx);
282
287 static bool IsPunctuation(const char_t* str, int idx) { return IsPunctuation(*(str + idx)); }
288
292 static bool IsPunctuation(char_t c)
293 {
294 uint8_t category = s_category_table[c];
295 return category >= (uint8_t)Globalization::UnicodeCategory::ConnectorPunctuation &&
297 }
298
302 static char_t ToUpper(char_t c)
303 {
305 }
306
310 static char_t ToLower(char_t c)
311 {
313 }
314
319 static char_t ToUpper(char_t c, const SharedPtr<Globalization::CultureInfo>& culture)
320 {
321 if (culture == nullptr)
322 {
323 throw ArgumentNullException(u"culture");
324 }
325 return culture->get_TextInfo()->ToUpper(c);
326 }
327
332 static char_t ToLower(char_t c, const SharedPtr<Globalization::CultureInfo>& culture)
333 {
334 if (culture == nullptr)
335 {
336 throw ArgumentNullException(u"culture");
337 }
338
339 return culture->get_TextInfo()->ToLower(c);
340 }
341
345 static char_t ToUpperInvariant(char_t c)
346 {
348 }
349
353 static char_t ToLowerInvariant(char_t c)
354 {
356 }
357
361 static String ConvertFromUtf32(uint32_t utf32)
362 {
363 if (((utf32 > 0x0010FFFF)) || ((utf32 >= 0xD800) && (utf32 <= 0xDFFF)))
364 {
365 throw ArgumentOutOfRangeException(u"ArgumentOutOfRange_InvalidUTF32");
366 }
367
368 return String::FromUtf32(&utf32, 1);
369 }
370
375 static int ConvertToUtf32(char_t highSurrogate, char_t lowSurrogate)
376 {
377 if (!Char::IsHighSurrogate(highSurrogate))
378 {
379 throw ArgumentOutOfRangeException(u"ArgumentOutOfRange_InvalidHighSurrogate");
380 }
381
382 if (!Char::IsLowSurrogate(lowSurrogate))
383 {
384 throw ArgumentOutOfRangeException(u"ArgumentOutOfRange_InvalidLowSurrogate");
385 }
386
387 return (int)(((highSurrogate -0xD800) * 0x0400) + (lowSurrogate - 0xDC00)) + 0x010000;
388 }
389
395 static ASPOSECPP_SHARED_API int ConvertToUtf32(const String& s, int index);
396
401 static bool TryParse(const System::String &s, char_t &result)
402 {
403 if (s.get_Length() != 1)
404 return false;
405
406 result = s[0];
407 return true;
408 }
409
413 static ASPOSECPP_SHARED_API double GetNumericValue(char_t c);
414
418 static ASPOSECPP_SHARED_API char_t Parse(const String& value);
419
420protected:
421
422#ifdef DISABLE_ASTRAL
424 static const uint8_t s_category_table[0xFFFF];
425#else
427 static const uint8_t ASPOSECPP_SHARED_API s_category_table[71680];
429 static const uint32_t ASPOSECPP_SHARED_API s_category_table_astral_index[4096];
430#endif
431};
432
433}
Provides methods for manipulation of characters represented as UTF-16 code units. This is a static ty...
Definition: char.h:17
static bool IsSeparator(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:177
static bool IsUpper(const String &str, int idx)
Determines whether the character at the specified index in the specified string is classified as an u...
static bool IsControl(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:97
static char_t ToLower(char_t c, const SharedPtr< Globalization::CultureInfo > &culture)
Converts the specified character to lower case.
Definition: char.h:332
static int ConvertToUtf32(const String &s, int index)
Converts the value of a UTF-16 encoded character or surrogate pair at a specified position in a strin...
static const uint8_t s_category_table[71680]
Table that maps Unicode characters to the Unicode categories.
Definition: char.h:427
static bool IsLower(char_t c)
Determines whether the specified character is classified as a lower case letter.
Definition: char.h:270
static const uint32_t s_category_table_astral_index[4096]
Table that maps Unicode characters to the Unicode astral plane indexes.
Definition: char.h:429
static bool IsSeparator(char_t c)
Determines whether the specified character is classified as a separator character.
Definition: char.h:182
static constexpr bool IsAsciiWhiteSpace(char_t c)
Determines whether the specified character is classified as a ASCII white space character.
Definition: char.h:234
static bool IsLower(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:265
static bool IsWhiteSpace(char_t c)
Determines whether the specified character is classified as a white space character.
Definition: char.h:214
static bool IsSurrogate(const String &s, int index)
Determines whether the character at the specified index in the specified string is UTF-16 surrogate c...
static bool IsDigit(char_t c)
Determines whether the specified character is classified as a decimal digit.
Definition: char.h:122
static String ConvertFromUtf32(uint32_t utf32)
Converts UTF-32 code unit into an instance of System::String class.
Definition: char.h:361
static bool IsSymbol(char_t c)
Determines whether the specified character is classified as a symbol character.
Definition: char.h:198
static bool IsLetter(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:131
static double GetNumericValue(char_t c)
Converts the specified UTF-16 character into double-precision floating-point numerical value.
static bool IsDigit(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:111
static bool IsDigit(const String &str, const int32_t idx)
Determines whether the character at the specified index in the specified string is classified as a de...
Definition: char.h:117
static Globalization::UnicodeCategory GetUnicodeCategory(char_t ch)
Returns a value that represents a Unicode category of specified character.
Definition: char.h:24
static char_t ToLowerInvariant(char_t c)
Converts the specified character to lower case.
Definition: char.h:353
static bool IsHighSurrogate(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is a high s...
Definition: char.h:69
static bool IsNumber(char_t c)
Determines whether the specified character is classified as a number.
Definition: char.h:166
static bool TryParse(const System::String &s, char_t &result)
Tries to convert a string consisting of a single character into UTF-16 character. The function succee...
Definition: char.h:401
static bool IsSurrogatePair(const String &str, int index)
Determines whether two consequent characters in the specified character buffer are a surrogate pair.
static bool IsPunctuation(char_t c)
Determines whether the specified character is classified as a punctuation character.
Definition: char.h:292
static bool IsWhiteSpace(const String &str, int index)
Determines whether the character at the specified index in the specified string is classified as a wh...
static char_t ToUpperInvariant(char_t c)
Converts the specified character to upper case.
Definition: char.h:345
static bool IsWhiteSpace(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:209
static bool IsNumber(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:161
static bool IsUpper(char_t c)
Determines whether the specified character is classified as an upper case letter.
Definition: char.h:256
static bool IsLetterOrDigit(char_t c)
Determines whether the specified character is classified as Unicode letter or a decimal digit.
Definition: char.h:150
static int ConvertToUtf32(char_t highSurrogate, char_t lowSurrogate)
Converts the specified UTF-16 surrogate pair into UTF-32 code unit.
Definition: char.h:375
static char_t ToLower(char_t c)
Converts the specified character to lower case.
Definition: char.h:310
static bool IsLower(const String &str, int idx)
Determines whether the character at the specified index in the specified string is classified as a lo...
static bool IsPunctuation(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:287
static bool IsLowSurrogate(char_t c)
Determines whether the specified character is a low surrogate.
Definition: char.h:88
static bool IsLetterOrDigit(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:145
static char_t Parse(const String &value)
Converts the first and the only character of the specified string to a char_t value.
static bool IsLowSurrogate(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is a low su...
Definition: char.h:83
static bool IsSurrogate(char_t c)
Determines if the specified character is a UTF-16 surrogate code unit.
Definition: char.h:32
static char_t ToUpper(char_t c)
Converts the specified character to upper case.
Definition: char.h:302
static char_t ToUpper(char_t c, const SharedPtr< Globalization::CultureInfo > &culture)
Converts the specified character to upper case.
Definition: char.h:319
static bool IsHighSurrogate(char_t c)
Determines whether the specified character is a high surrogate.
Definition: char.h:74
static bool IsUpper(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:251
static bool IsSymbol(const char_t *str, int idx)
Determines whether the character at the specified index in the specified character buffer is classifi...
Definition: char.h:193
static bool IsControl(char_t c)
Determines whether the specified character is classified as Unicode control character.
Definition: char.h:102
static bool IsSurrogatePair(char_t highSurrogate, char_t lowSurrogate)
Determines whether the two specified characters for a UTF-16 surrogate pair.
Definition: char.h:47
static bool IsLetter(char_t c)
Determines whether the specified character is classified as Unicode letter.
Definition: char.h:136
static bool IsHighSurrogate(const String &s, int index)
Determines whether the character at the specified index in the specified string is UTF-16 high surrog...
static const CultureInfoPtr & get_CurrentCulture()
Gets culture set for current thread.
static const CultureInfoPtr & get_InvariantCulture()
Gets invariant culture.
Pointer class to wrap types being allocated on heap. Use it to manage memory for classes inheriting O...
Definition: smart_ptr.h:180
String class used across the library. Is a substitute for C# System.String when translating code....
Definition: string.h:122
int get_Length() const
Gets string length.
static String FromUtf32(const uint32_t *utf32, int32_t length)
Creates String from utf32 string.
UnicodeCategory
Category of unicode character.
Definition: unicode_category.h:10
@ ParagraphSeparator
Paragraph separator.
@ OtherPunctuation
Other punctuation sign.
@ ConnectorPunctuation
Connector punctuation sign.
Definition: db_command.h:9