LibreOffice
LibreOffice 5.3 SDK C/C++ API Reference
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
character.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #ifndef INCLUDED_RTL_CHARACTER_HXX
21 #define INCLUDED_RTL_CHARACTER_HXX
22 
23 #include <sal/config.h>
24 
25 #include <cassert>
26 #include <cstddef>
27 
28 #include <sal/types.h>
29 
30 namespace rtl
31 {
32 
41 inline bool isUnicodeCodePoint(sal_uInt32 code)
42 {
43  return code <= 0x10FFFF;
44 }
45 
54 inline bool isAscii(sal_uInt32 code)
55 {
56  assert(isUnicodeCodePoint(code));
57  return code <= 0x7F;
58 }
59 
69 inline bool isAsciiLowerCase(sal_uInt32 code)
70 {
71  assert(isUnicodeCodePoint(code));
72  return code >= 'a' && code <= 'z';
73 }
74 
84 inline bool isAsciiUpperCase(sal_uInt32 code)
85 {
86  assert(isUnicodeCodePoint(code));
87  return code >= 'A' && code <= 'Z';
88 }
89 
99 inline bool isAsciiAlpha(sal_uInt32 code)
100 {
101  assert(isUnicodeCodePoint(code));
102  return isAsciiLowerCase(code) || isAsciiUpperCase(code);
103 }
104 
114 inline bool isAsciiDigit(sal_uInt32 code)
115 {
116  assert(isUnicodeCodePoint(code));
117  return code >= '0' && code <= '9';
118 }
119 
129 inline bool isAsciiAlphanumeric(sal_uInt32 code)
130 {
131  assert(isUnicodeCodePoint(code));
132  return isAsciiDigit(code) || isAsciiAlpha(code);
133 }
134 
144 inline bool isAsciiCanonicHexDigit(sal_uInt32 code)
145 {
146  assert(isUnicodeCodePoint(code));
147  return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
148 }
149 
159 inline bool isAsciiHexDigit(sal_uInt32 code)
160 {
161  assert(isUnicodeCodePoint(code));
162  return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
163 }
164 
173 inline bool isAsciiOctalDigit(sal_uInt32 code)
174 {
175  assert(isUnicodeCodePoint(code));
176  return code >= '0' && code <= '7';
177 }
178 
179 
188 inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
189 {
190  assert(isUnicodeCodePoint(code));
191  return isAsciiLowerCase(code) ? code - 32 : code;
192 }
193 
202 inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
203 {
204  assert(isUnicodeCodePoint(code));
205  return isAsciiUpperCase(code) ? code + 32 : code;
206 }
207 
220 inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
221 {
222  assert(isUnicodeCodePoint(code1));
223  assert(isUnicodeCodePoint(code2));
224  return static_cast<sal_Int32>(toAsciiLowerCase(code1))
225  - static_cast<sal_Int32>(toAsciiLowerCase(code2));
226 }
227 
229 namespace detail {
230 
231 sal_uInt32 const surrogatesHighFirst = 0xD800;
232 sal_uInt32 const surrogatesHighLast = 0xDBFF;
233 sal_uInt32 const surrogatesLowFirst = 0xDC00;
234 sal_uInt32 const surrogatesLowLast = 0xDFFF;
235 
236 }
238 
247 inline bool isHighSurrogate(sal_uInt32 code) {
248  assert(isUnicodeCodePoint(code));
249  return code >= detail::surrogatesHighFirst
250  && code <= detail::surrogatesHighLast;
251 }
252 
261 inline bool isLowSurrogate(sal_uInt32 code) {
262  assert(isUnicodeCodePoint(code));
263  return code >= detail::surrogatesLowFirst
264  && code <= detail::surrogatesLowLast;
265 }
266 
275 inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
276  assert(isUnicodeCodePoint(code));
277  assert(code >= 0x10000);
278  return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
279 }
280 
289 inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
290  assert(isUnicodeCodePoint(code));
291  assert(code >= 0x10000);
292  return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
293 }
294 
305 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
306  assert(isHighSurrogate(high));
307  assert(isLowSurrogate(low));
308  return ((high - detail::surrogatesHighFirst) << 10)
309  + (low - detail::surrogatesLowFirst) + 0x10000;
310 }
311 
324 inline std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode * output) {
325  assert(isUnicodeCodePoint(code));
326  assert(output != NULL);
327  if (code < 0x10000) {
328  output[0] = code;
329  return 1;
330  } else {
331  output[0] = getHighSurrogate(code);
332  output[1] = getLowSurrogate(code);
333  return 2;
334  }
335 }
336 
337 }
338 
339 #endif
340 
341 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool isAsciiHexDigit(sal_uInt32 code)
Check for ASCII hexadecimal digit character.
Definition: character.hxx:159
bool isUnicodeCodePoint(sal_uInt32 code)
Check for Unicode code point.
Definition: character.hxx:41
bool isAsciiDigit(sal_uInt32 code)
Check for ASCII digit character.
Definition: character.hxx:114
bool isAsciiAlpha(sal_uInt32 code)
Check for ASCII alphabetic character.
Definition: character.hxx:99
bool isAsciiOctalDigit(sal_uInt32 code)
Check for ASCII octal digit character.
Definition: character.hxx:173
sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
Convert a character, if ASCII, to upper case.
Definition: character.hxx:188
bool isAscii(sal_uInt32 code)
Check for ASCII character.
Definition: character.hxx:54
bool isAsciiCanonicHexDigit(sal_uInt32 code)
Check for ASCII canonic hexadecimal digit character.
Definition: character.hxx:144
bool isLowSurrogate(sal_uInt32 code)
Check for low surrogate.
Definition: character.hxx:261
std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode *output)
Split a Unicode code point into UTF-16 code units.
Definition: character.hxx:324
bool isAsciiAlphanumeric(sal_uInt32 code)
Check for ASCII alphanumeric character.
Definition: character.hxx:129
bool isAsciiUpperCase(sal_uInt32 code)
Check for ASCII upper case character.
Definition: character.hxx:84
bool isAsciiLowerCase(sal_uInt32 code)
Check for ASCII lower case character.
Definition: character.hxx:69
sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
Convert a character, if ASCII, to lower case.
Definition: character.hxx:202
sal_uInt16 sal_Unicode
Definition: types.h:155
sal_Unicode getLowSurrogate(sal_uInt32 code)
Get low surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:289
sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
Combine surrogates to form a code point.
Definition: character.hxx:305
sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
Compare two characters ignoring ASCII case.
Definition: character.hxx:220
sal_Unicode getHighSurrogate(sal_uInt32 code)
Get high surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:275
bool isHighSurrogate(sal_uInt32 code)
Check for high surrogate.
Definition: character.hxx:247