mirror of
https://git.dev.opencascade.org/repos/occt.git
synced 2025-04-05 18:16:23 +03:00
337 lines
13 KiB
Plaintext
Executable File
337 lines
13 KiB
Plaintext
Executable File
// Created on: 2013-01-28
|
|
// Created by: Kirill GAVRILOV
|
|
// Copyright (c) 2013-2014 OPEN CASCADE SAS
|
|
//
|
|
// This file is part of Open CASCADE Technology software library.
|
|
//
|
|
// This library is free software; you can redistribute it and/or modify it under
|
|
// the terms of the GNU Lesser General Public License version 2.1 as published
|
|
// by the Free Software Foundation, with special exception defined in the file
|
|
// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
|
|
// distribution for complete text of the license and disclaimer of any warranty.
|
|
//
|
|
// Alternatively, this file may be used under the terms of Open CASCADE
|
|
// commercial license or contractual agreement.
|
|
|
|
// Portions of code are copyrighted by Unicode, Inc.
|
|
//
|
|
// Copyright © 2001-2004 Unicode, Inc.
|
|
//
|
|
// Disclaimer
|
|
//
|
|
// This source code is provided as is by Unicode, Inc. No claims are
|
|
// made as to fitness for any particular purpose. No warranties of any
|
|
// kind are expressed or implied. The recipient agrees to determine
|
|
// applicability of information provided. If this file has been
|
|
// purchased on magnetic or optical media from Unicode, Inc., the
|
|
// sole remedy for any claim will be exchange of defective media
|
|
// within 90 days of receipt.
|
|
//
|
|
// Limitations on Rights to Redistribute This Code
|
|
//
|
|
// Unicode, Inc. hereby grants the right to freely use the information
|
|
// supplied in this file in the creation of products supporting the
|
|
// Unicode Standard, and to make copies of this file in any form
|
|
// for internal or external distribution as long as this notice
|
|
// remains attached.
|
|
|
|
//! The first character in a UTF-8 sequence indicates how many bytes
|
|
//! to read (among other things).
|
|
template<typename Type>
|
|
const unsigned char NCollection_UtfIterator<Type>::UTF8_BYTES_MINUS_ONE[256] =
|
|
{
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
|
};
|
|
|
|
//! Magic values subtracted from a buffer value during UTF-8 conversion.
|
|
//! This table contains as many values as there might be trailing bytes
|
|
//! in a UTF-8 sequence.
|
|
template<typename Type>
|
|
const unsigned long NCollection_UtfIterator<Type>::offsetsFromUTF8[6] =
|
|
{
|
|
0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
|
0x03C82080UL, 0xFA082080UL, 0x82082080UL
|
|
};
|
|
|
|
//! The first character in a UTF-8 sequence indicates how many bytes to read.
|
|
template<typename Type>
|
|
const unsigned char NCollection_UtfIterator<Type>::UTF8_FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
|
|
|
// =======================================================================
|
|
// function : readUTF8
|
|
// purpose : Get a UTF-8 character; leave the tracking pointer at the start of the next character.
|
|
// Not protected against invalid UTF-8.
|
|
// =======================================================================
|
|
template<typename Type>
|
|
inline void NCollection_UtfIterator<Type>::readUTF8()
|
|
{
|
|
// unsigned arithmetic used
|
|
Standard_Utf8UChar* aPos = (Standard_Utf8UChar* )myPosNext;
|
|
const unsigned char aBytesToRead = UTF8_BYTES_MINUS_ONE[*aPos];
|
|
myCharUtf32 = 0;
|
|
switch (aBytesToRead)
|
|
{
|
|
case 5: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
|
|
case 4: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
|
|
case 3: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
|
|
case 2: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
|
|
case 1: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
|
|
case 0: myCharUtf32 += *aPos++;
|
|
}
|
|
myCharUtf32 -= offsetsFromUTF8[aBytesToRead];
|
|
myPosNext = (Type* )aPos;
|
|
}
|
|
|
|
// magic numbers
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MASK = 0xBF;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MARK = 0x80;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_START = 0xD800;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_END = 0xDBFF;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_START = 0xDC00;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_END = 0xDFFF;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_SHIFT = 10;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_BASE = 0x0010000UL;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_MASK = 0x3FFUL;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_BMP = 0x0000FFFFUL;
|
|
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_LEGAL = 0x0010FFFFUL;
|
|
|
|
// =======================================================================
|
|
// function : readUTF16
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> inline
|
|
void NCollection_UtfIterator<Type>::readUTF16()
|
|
{
|
|
Standard_Utf32Char aChar = *myPosNext++;
|
|
// if we have the first half of the surrogate pair
|
|
if (aChar >= UTF16_SURROGATE_HIGH_START
|
|
&& aChar <= UTF16_SURROGATE_HIGH_END)
|
|
{
|
|
const Standard_Utf32Char aChar2 = *myPosNext;
|
|
// complete the surrogate pair
|
|
if (aChar2 >= UTF16_SURROGATE_LOW_START
|
|
&& aChar2 <= UTF16_SURROGATE_LOW_END)
|
|
{
|
|
aChar = ((aChar - UTF16_SURROGATE_HIGH_START) << UTF16_SURROGATE_HIGH_SHIFT)
|
|
+ (aChar2 - UTF16_SURROGATE_LOW_START) + UTF16_SURROGATE_LOW_BASE;
|
|
++myPosNext;
|
|
}
|
|
}
|
|
myCharUtf32 = aChar;
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : AdvanceBytesUtf8
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> inline
|
|
Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf8() const
|
|
{
|
|
if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
|
|
&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
|
|
{
|
|
// UTF-16 surrogate values are illegal in UTF-32
|
|
return 0;
|
|
}
|
|
else if (myCharUtf32 < Standard_Utf32Char(0x80))
|
|
{
|
|
return 1;
|
|
}
|
|
else if (myCharUtf32 < Standard_Utf32Char(0x800))
|
|
{
|
|
return 2;
|
|
}
|
|
else if (myCharUtf32 < Standard_Utf32Char(0x10000))
|
|
{
|
|
return 3;
|
|
}
|
|
else if (myCharUtf32 <= UTF32_MAX_LEGAL)
|
|
{
|
|
return 4;
|
|
}
|
|
else
|
|
{
|
|
// illegal
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : GetUtf8
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> inline
|
|
Standard_Utf8Char* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8Char* theBuffer) const
|
|
{
|
|
// unsigned arithmetic used
|
|
return (Standard_Utf8Char* )GetUtf8 ((Standard_Utf8UChar* )theBuffer);
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : GetUtf8
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> inline
|
|
Standard_Utf8UChar* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8UChar* theBuffer) const
|
|
{
|
|
Standard_Utf32Char aChar = myCharUtf32;
|
|
if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
|
|
&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
|
|
{
|
|
// UTF-16 surrogate values are illegal in UTF-32
|
|
return theBuffer;
|
|
}
|
|
else if (myCharUtf32 < Standard_Utf32Char(0x80))
|
|
{
|
|
*theBuffer++ = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[1]);
|
|
return theBuffer;
|
|
}
|
|
else if (myCharUtf32 < Standard_Utf32Char(0x800))
|
|
{
|
|
*++theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
|
|
*--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[2]);
|
|
return theBuffer + 2;
|
|
}
|
|
else if (myCharUtf32 < Standard_Utf32Char(0x10000))
|
|
{
|
|
theBuffer += 3;
|
|
*--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
|
|
*--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
|
|
*--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[3]);
|
|
return theBuffer + 3;
|
|
}
|
|
else if (myCharUtf32 <= UTF32_MAX_LEGAL)
|
|
{
|
|
theBuffer += 4;
|
|
*--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
|
|
*--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
|
|
*--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
|
|
*--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[4]);
|
|
return theBuffer + 4;
|
|
}
|
|
else
|
|
{
|
|
// illegal
|
|
return theBuffer;
|
|
}
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : AdvanceBytesUtf16
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> inline
|
|
Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf16() const
|
|
{
|
|
if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
|
|
{
|
|
// UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
|
|
if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
|
|
&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
|
|
{
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
return Standard_Integer(sizeof(Standard_Utf16Char));
|
|
}
|
|
}
|
|
else if (myCharUtf32 > UTF32_MAX_LEGAL)
|
|
{
|
|
// illegal
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
// target is a character in range 0xFFFF - 0x10FFFF
|
|
// surrogate pair
|
|
return Standard_Integer(sizeof(Standard_Utf16Char) * 2);
|
|
}
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : GetUtf16
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> inline
|
|
Standard_Utf16Char* NCollection_UtfIterator<Type>::GetUtf16 (Standard_Utf16Char* theBuffer) const
|
|
{
|
|
if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
|
|
{
|
|
// UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
|
|
if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
|
|
&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
|
|
{
|
|
return theBuffer;
|
|
}
|
|
else
|
|
{
|
|
*theBuffer++ = Standard_Utf16Char(myCharUtf32);
|
|
return theBuffer;
|
|
}
|
|
}
|
|
else if (myCharUtf32 > UTF32_MAX_LEGAL)
|
|
{
|
|
// illegal
|
|
return theBuffer;
|
|
}
|
|
else
|
|
{
|
|
// surrogate pair
|
|
Standard_Utf32Char aChar = myCharUtf32 - UTF16_SURROGATE_LOW_BASE;
|
|
*theBuffer++ = Standard_Utf16Char((aChar >> UTF16_SURROGATE_HIGH_SHIFT) + UTF16_SURROGATE_HIGH_START);
|
|
*theBuffer++ = Standard_Utf16Char((aChar & UTF16_SURROGATE_LOW_MASK) + UTF16_SURROGATE_LOW_START);
|
|
return theBuffer;
|
|
}
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : GetUtf32
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> inline
|
|
Standard_Utf32Char* NCollection_UtfIterator<Type>::GetUtf32 (Standard_Utf32Char* theBuffer) const
|
|
{
|
|
*theBuffer++ = myCharUtf32;
|
|
return theBuffer;
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : AdvanceBytesUtf
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> template<typename TypeWrite> inline
|
|
Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf() const
|
|
{
|
|
switch (sizeof(TypeWrite))
|
|
{
|
|
case sizeof(Standard_Utf8Char): return AdvanceBytesUtf8();
|
|
case sizeof(Standard_Utf16Char): return AdvanceBytesUtf16();
|
|
case sizeof(Standard_Utf32Char): return AdvanceBytesUtf32();
|
|
default: return 0; // invalid case
|
|
}
|
|
}
|
|
|
|
// =======================================================================
|
|
// function : GetUtf
|
|
// purpose :
|
|
// =======================================================================
|
|
template<typename Type> template<typename TypeWrite> inline
|
|
TypeWrite* NCollection_UtfIterator<Type>::GetUtf (TypeWrite* theBuffer) const
|
|
{
|
|
switch (sizeof(TypeWrite))
|
|
{
|
|
case sizeof(Standard_Utf8Char): return (TypeWrite* )GetUtf8 ((Standard_Utf8UChar* )theBuffer);
|
|
case sizeof(Standard_Utf16Char): return (TypeWrite* )GetUtf16((Standard_Utf16Char* )theBuffer);
|
|
case sizeof(Standard_Utf32Char): return (TypeWrite* )GetUtf32((Standard_Utf32Char* )theBuffer);
|
|
default: return NULL; // invalid case
|
|
}
|
|
}
|