LibreOffice
LibreOffice 7.2 SDK C/C++ API Reference
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
character.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 /*
21  * This file is part of LibreOffice published API.
22  */
23 
24 #ifndef INCLUDED_RTL_CHARACTER_HXX
25 #define INCLUDED_RTL_CHARACTER_HXX
26 
27 #include "sal/config.h"
28 
29 #include <cassert>
30 #include <cstddef>
31 
32 #include "sal/types.h"
33 
34 namespace rtl
35 {
44 inline SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code) { return code <= 0x10FFFF; }
45 
54 inline SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
55 {
56  assert(isUnicodeCodePoint(code));
57  return code <= 0x7F;
58 }
59 
60 #if defined LIBO_INTERNAL_ONLY
61 bool isAscii(char) = delete;
62 bool isAscii(signed char) = delete;
63 template <typename T> inline constexpr bool isAscii(T code) { return isAscii(sal_uInt32(code)); }
64 #endif
65 
75 inline SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
76 {
77  assert(isUnicodeCodePoint(code));
78  return code >= 'a' && code <= 'z';
79 }
80 
81 #if defined LIBO_INTERNAL_ONLY
82 bool isAsciiLowerCase(char) = delete;
83 bool isAsciiLowerCase(signed char) = delete;
84 template <typename T> inline constexpr bool isAsciiLowerCase(T code)
85 {
86  return isAsciiLowerCase(sal_uInt32(code));
87 }
88 #endif
89 
99 inline SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
100 {
101  assert(isUnicodeCodePoint(code));
102  return code >= 'A' && code <= 'Z';
103 }
104 
105 #if defined LIBO_INTERNAL_ONLY
106 bool isAsciiUpperCase(char) = delete;
107 bool isAsciiUpperCase(signed char) = delete;
108 template <typename T> inline constexpr bool isAsciiUpperCase(T code)
109 {
110  return isAsciiUpperCase(sal_uInt32(code));
111 }
112 #endif
113 
123 inline SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
124 {
125  assert(isUnicodeCodePoint(code));
126  return isAsciiLowerCase(code) || isAsciiUpperCase(code);
127 }
128 
129 #if defined LIBO_INTERNAL_ONLY
130 bool isAsciiAlpha(char) = delete;
131 bool isAsciiAlpha(signed char) = delete;
132 template <typename T> inline constexpr bool isAsciiAlpha(T code)
133 {
134  return isAsciiAlpha(sal_uInt32(code));
135 }
136 #endif
137 
147 inline SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
148 {
149  assert(isUnicodeCodePoint(code));
150  return code >= '0' && code <= '9';
151 }
152 
153 #if defined LIBO_INTERNAL_ONLY
154 bool isAsciiDigit(char) = delete;
155 bool isAsciiDigit(signed char) = delete;
156 template <typename T> inline constexpr bool isAsciiDigit(T code)
157 {
158  return isAsciiDigit(sal_uInt32(code));
159 }
160 #endif
161 
171 inline SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
172 {
173  assert(isUnicodeCodePoint(code));
174  return isAsciiDigit(code) || isAsciiAlpha(code);
175 }
176 
177 #if defined LIBO_INTERNAL_ONLY
178 bool isAsciiAlphanumeric(char) = delete;
179 bool isAsciiAlphanumeric(signed char) = delete;
180 template <typename T> inline constexpr bool isAsciiAlphanumeric(T code)
181 {
182  return isAsciiAlphanumeric(sal_uInt32(code));
183 }
184 #endif
185 
195 inline SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
196 {
197  assert(isUnicodeCodePoint(code));
198  return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
199 }
200 
201 #if defined LIBO_INTERNAL_ONLY
202 bool isAsciiCanonicHexDigit(char) = delete;
203 bool isAsciiCanonicHexDigit(signed char) = delete;
204 template <typename T> inline constexpr bool isAsciiCanonicHexDigit(T code)
205 {
206  return isAsciiCanonicHexDigit(sal_uInt32(code));
207 }
208 #endif
209 
219 inline SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
220 {
221  assert(isUnicodeCodePoint(code));
222  return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
223 }
224 
225 #if defined LIBO_INTERNAL_ONLY
226 bool isAsciiHexDigit(char) = delete;
227 bool isAsciiHexDigit(signed char) = delete;
228 template <typename T> inline constexpr bool isAsciiHexDigit(T code)
229 {
230  return isAsciiHexDigit(sal_uInt32(code));
231 }
232 #endif
233 
242 inline SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
243 {
244  assert(isUnicodeCodePoint(code));
245  return code >= '0' && code <= '7';
246 }
247 
248 #if defined LIBO_INTERNAL_ONLY
249 bool isAsciiOctalDigit(char) = delete;
250 bool isAsciiOctalDigit(signed char) = delete;
251 template <typename T> inline constexpr bool isAsciiOctalDigit(T code)
252 {
253  return isAsciiOctalDigit(sal_uInt32(code));
254 }
255 #endif
256 
266 inline SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
267 {
268  assert(isUnicodeCodePoint(code));
269  return code == ' ' || code == '\f' || code == '\n' || code == '\r' || code == '\t'
270  || code == '\v';
271 }
272 
273 #if defined LIBO_INTERNAL_ONLY
274 bool isAsciiWhiteSpace(char) = delete;
275 bool isAsciiWhiteSpace(signed char) = delete;
276 template <typename T> inline constexpr bool isAsciiWhiteSpace(T code)
277 {
278  return isAsciiWhiteSpace(sal_uInt32(code));
279 }
280 #endif
281 
290 inline SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
291 {
292  assert(isUnicodeCodePoint(code));
293  return isAsciiLowerCase(code) ? code - 32 : code;
294 }
295 
296 #if defined LIBO_INTERNAL_ONLY
297 sal_uInt32 toAsciiUpperCase(char) = delete;
298 sal_uInt32 toAsciiUpperCase(signed char) = delete;
299 template <typename T> inline constexpr sal_uInt32 toAsciiUpperCase(T code)
300 {
301  return toAsciiUpperCase(sal_uInt32(code));
302 }
303 #endif
304 
313 inline SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
314 {
315  assert(isUnicodeCodePoint(code));
316  return isAsciiUpperCase(code) ? code + 32 : code;
317 }
318 
319 #if defined LIBO_INTERNAL_ONLY
320 sal_uInt32 toAsciiLowerCase(char) = delete;
321 sal_uInt32 toAsciiLowerCase(signed char) = delete;
322 template <typename T> inline constexpr sal_uInt32 toAsciiLowerCase(T code)
323 {
324  return toAsciiLowerCase(sal_uInt32(code));
325 }
326 #endif
327 
340 inline SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
341 {
342  assert(isUnicodeCodePoint(code1));
343  assert(isUnicodeCodePoint(code2));
344  return static_cast<sal_Int32>(toAsciiLowerCase(code1))
345  - static_cast<sal_Int32>(toAsciiLowerCase(code2));
346 }
347 
349 namespace detail
350 {
351 sal_uInt32 const surrogatesHighFirst = 0xD800;
352 sal_uInt32 const surrogatesHighLast = 0xDBFF;
353 sal_uInt32 const surrogatesLowFirst = 0xDC00;
354 sal_uInt32 const surrogatesLowLast = 0xDFFF;
355 }
357 
366 inline SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
367 {
368  assert(isUnicodeCodePoint(code));
369  return code >= detail::surrogatesHighFirst && code <= detail::surrogatesLowLast;
370 }
371 
380 inline SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
381 {
382  assert(isUnicodeCodePoint(code));
383  return code >= detail::surrogatesHighFirst && code <= detail::surrogatesHighLast;
384 }
385 
394 inline SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
395 {
396  assert(isUnicodeCodePoint(code));
397  return code >= detail::surrogatesLowFirst && code <= detail::surrogatesLowLast;
398 }
399 
409 {
410  assert(isUnicodeCodePoint(code));
411  assert(code >= 0x10000);
412  return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
413 }
414 
424 {
425  assert(isUnicodeCodePoint(code));
426  assert(code >= 0x10000);
427  return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
428 }
429 
440 inline SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
441 {
442  assert(isHighSurrogate(high));
443  assert(isLowSurrogate(low));
444  return ((high - detail::surrogatesHighFirst) << 10) + (low - detail::surrogatesLowFirst)
445  + 0x10000;
446 }
447 
460 inline SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode* output)
461 {
462  assert(isUnicodeCodePoint(code));
463  assert(output != NULL);
464  if (code < 0x10000)
465  {
466  output[0] = code;
467  return 1;
468  }
469  else
470  {
471  output[0] = getHighSurrogate(code);
472  output[1] = getLowSurrogate(code);
473  return 2;
474  }
475 }
476 
485 inline SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
486 {
487  return isUnicodeCodePoint(code) && !isSurrogate(code);
488 }
489 }
490 
491 #endif
492 
493 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define SAL_CONSTEXPR
C++11 &quot;constexpr&quot; feature.
Definition: types.h:404
SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
Check for ASCII character.
Definition: character.hxx:54
SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
Check for ASCII alphabetic character.
Definition: character.hxx:123
SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code)
Check for Unicode code point.
Definition: character.hxx:44
SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
Compare two characters ignoring ASCII case.
Definition: character.hxx:340
SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
Check for Unicode scalar value.
Definition: character.hxx:485
SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
Check for ASCII upper case character.
Definition: character.hxx:99
SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode *output)
Split a Unicode code point into UTF-16 code units.
Definition: character.hxx:460
SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
Check for surrogate.
Definition: character.hxx:366
SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
Check for ASCII digit character.
Definition: character.hxx:147
SAL_CONSTEXPR sal_Unicode getLowSurrogate(sal_uInt32 code)
Get low surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:423
SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
Check for ASCII lower case character.
Definition: character.hxx:75
SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
Check for ASCII canonic hexadecimal digit character.
Definition: character.hxx:195
SAL_CONSTEXPR sal_Unicode getHighSurrogate(sal_uInt32 code)
Get high surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:408
SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
Combine surrogates to form a code point.
Definition: character.hxx:440
SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
Check for low surrogate.
Definition: character.hxx:394
SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
Check for ASCII octal digit character.
Definition: character.hxx:242
SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
Check for ASCII white space character.
Definition: character.hxx:266
SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
Convert a character, if ASCII, to upper case.
Definition: character.hxx:290
SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
Check for high surrogate.
Definition: character.hxx:380
SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
Check for ASCII alphanumeric character.
Definition: character.hxx:171
SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
Convert a character, if ASCII, to lower case.
Definition: character.hxx:313
sal_uInt16 sal_Unicode
Definition: types.h:123
SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
Check for ASCII hexadecimal digit character.
Definition: character.hxx:219