1
0
mirror of https://git.dev.opencascade.org/repos/occt.git synced 2025-04-03 17:56:21 +03:00

0028454: Data Exchange, STEP reader - names with special characters cannot be read

- Add support of the control directives ( "\X2\" "\X4" "\X\" "\P*\" "\S\");
- Make param "read.stepcaf.codepage" base for conversion inside StepData instead of CAF;
- Rename "read.stepcaf.codepage" to "read.step.codepage".
- Add ISO 8859-1 - 9 code pages for conversion
- Add Resource_FormatType_NoConversion format type, that indicates non-conversion behavior
- Update old test cases that contain control directives
This commit is contained in:
dpasukhi 2020-10-09 13:57:30 +03:00 committed by bugmaster
parent 380748c340
commit 1b9cb073b9
22 changed files with 949 additions and 89 deletions

View File

@ -1,5 +1,5 @@
Resource_ANSI.pxx
Resource_Big5.pxx
Resource_CodePages.pxx
Resource_ConvertUnicode.c
Resource_ConvertUnicode.hxx
Resource_DataMapOfAsciiStringAsciiString.hxx

View File

@ -14,7 +14,7 @@
#include <Standard_TypeDef.hxx>
// Code pages ANSI -> UTF16
static const Standard_ExtCharacter THE_CODEPAGES_ANSI[9][128] =
static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_iso8859_9 - Resource_FormatType_CP1250 + 1][128] =
{
{
// code page: cp1250
@ -338,5 +338,329 @@ static const Standard_ExtCharacter THE_CODEPAGES_ANSI[9][128] =
0xf4, 0x1a1, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0x1b0, 0x20ab, 0xff
},
{
// code page: ISO 8859-1
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3,
0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab,
0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3,
0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb,
0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3,
0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb,
0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff
},
{
// code page: ISO 8859-2
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x104, 0x2d8, 0x141,
0xa4, 0x13d, 0x15a, 0xa7,
0xa8, 0x160, 0x15e, 0x164,
0x179, 0xad, 0x17d, 0x17b,
0xb0, 0x105, 0x2db, 0x142,
0xb4, 0x13e, 0x15b, 0x2c7,
0xb8, 0x161, 0x15f, 0x165,
0x17a, 0x2dd, 0x17e, 0x17c,
0x154, 0xc1, 0xc2, 0x102,
0xc4, 0x139, 0x106, 0xc7,
0x10c, 0xc9, 0x118, 0xcb,
0x11a, 0xcd, 0xce, 0x10e,
0x110, 0x143, 0x147, 0xd3,
0xd4, 0x150, 0xd6, 0xd7,
0x158, 0x16e, 0xda, 0x170,
0xdc, 0xdd, 0x162, 0xdf,
0x155, 0xe1, 0xe2, 0x103,
0xe4, 0x13a, 0x107, 0xe7,
0x10d, 0xe9, 0x119, 0xeb,
0x11b, 0xed, 0xee, 0x10f,
0x111, 0x144, 0x148, 0xf3,
0xf4, 0x151, 0xf6, 0xf7,
0x159, 0x16f, 0xfa, 0x171,
0xfc, 0xfd, 0x163, 0x2d9
},
{
// code page: ISO 8859-3
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x126, 0x2d8, 0xa3,
0xa4, 0x0, 0x124, 0xa7,
0xa8, 0x130, 0x15e, 0x11e,
0x134, 0xad, 0x0, 0x17b,
0xb0, 0x127, 0xb2, 0xb3,
0xb4, 0xb5, 0x125, 0xb7,
0xb8, 0x131, 0x15f, 0x11f,
0x135, 0xbd, 0x0, 0x17c,
0xc0, 0xc1, 0xc2, 0x0,
0xc4, 0x10a, 0x108, 0xc7,
0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf,
0x0, 0xd1, 0xd2, 0xd3,
0xd4, 0x120, 0xd6, 0xd7,
0x11c, 0xd9, 0xda, 0xdb,
0xdc, 0x16c, 0x15c, 0xdf,
0xe0, 0xe1, 0xe2, 0x0,
0xe4, 0x10b, 0x109, 0xe7,
0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef,
0x0, 0xf1, 0xf2, 0xf3,
0xf4, 0x121, 0xf6, 0xf7,
0x11d, 0xf9, 0xfa, 0xfb,
0xfc, 0x16d, 0x15d, 0x2d9
},
{
// code page: ISO 8859-4
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x104, 0x138, 0x156,
0xa4, 0x128, 0x13b, 0xa7,
0xa8, 0x160, 0x112, 0x122,
0x166, 0xad, 0x17d, 0xaf,
0xb0, 0x105, 0x2db, 0x157,
0xb4, 0x129, 0x13c, 0x2c7,
0xb8, 0x161, 0x113, 0x123,
0x167, 0x14a, 0x17e, 0x14b,
0x100, 0xc1, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0x12e,
0x10c, 0xc9, 0x118, 0xcb,
0x116, 0xcd, 0xce, 0x12a,
0x110, 0x145, 0x14c, 0x136,
0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0x172, 0xda, 0xdb,
0xdc, 0x168, 0x16a, 0xdf,
0x101, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0x12f,
0x10d, 0xe9, 0x119, 0xeb,
0x117, 0xed, 0xee, 0x12b,
0x111, 0x146, 0x14d, 0x137,
0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0x173, 0xfa, 0xfb,
0xfc, 0x169, 0x16b, 0x2d9
},
{
// code page: ISO 8859-5
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x401, 0x402, 0x403,
0x404, 0x405, 0x406, 0x407,
0x408, 0x409, 0x40a, 0x40b,
0x40c, 0xad, 0x40e, 0x40f,
0x410, 0x411, 0x412, 0x413,
0x414, 0x415, 0x416, 0x417,
0x418, 0x419, 0x41a, 0x41b,
0x41c, 0x41d, 0x41e, 0x41f,
0x420, 0x421, 0x422, 0x423,
0x424, 0x425, 0x426, 0x427,
0x428, 0x429, 0x42a, 0x42b,
0x42c, 0x42d, 0x42e, 0x42f,
0x430, 0x431, 0x432, 0x433,
0x434, 0x435, 0x436, 0x437,
0x438, 0x439, 0x43a, 0x43b,
0x43c, 0x43d, 0x43e, 0x43f,
0x440, 0x441, 0x442, 0x443,
0x444, 0x445, 0x446, 0x447,
0x448, 0x449, 0x44a, 0x44b,
0x44c, 0x44d, 0x44e, 0x44f,
0x2116, 0x451, 0x452, 0x453,
0x454, 0x455, 0x456, 0x457,
0x458, 0x459, 0x45a, 0x45b,
0x45c, 0xa7, 0x45e, 0x45f
},
{
// code page: ISO 8859-6
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x0, 0x0, 0x0,
0xa4, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x60c, 0xad, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x61b,
0x0, 0x0, 0x0, 0x61f,
0x0, 0x621, 0x622, 0x623,
0x624, 0x625, 0x626, 0x627,
0x628, 0x629, 0x62a, 0x62b,
0x62c, 0x62d, 0x62e, 0x62f,
0x630, 0x631, 0x632, 0x633,
0x634, 0x635, 0x636, 0x637,
0x638, 0x639, 0x63a, 0x0,
0x0, 0x0, 0x0, 0x0,
0x640, 0x641, 0x642, 0x643,
0x644, 0x645, 0x646, 0x647,
0x648, 0x649, 0x64a, 0x64b,
0x64c, 0x64d, 0x64e, 0x64f,
0x650, 0x651, 0x652, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0
},
{
// code page: ISO 8859-7
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x2018, 0x2019, 0xa3,
0x0, 0x0, 0xa6, 0xa7,
0xa8, 0xa9, 0x0, 0xab,
0xac, 0xad, 0x0, 0x2015,
0xb0, 0xb1, 0xb2, 0xb3,
0x384, 0x385, 0x386, 0xb7,
0x388, 0x389, 0x38a, 0xbb,
0x38c, 0xbd, 0x38e, 0x38f,
0x390, 0x391, 0x392, 0x393,
0x394, 0x395, 0x396, 0x397,
0x398, 0x399, 0x39a, 0x39b,
0x39c, 0x39d, 0x39e, 0x39f,
0x3a0, 0x3a1, 0x0, 0x3a3,
0x3a4, 0x3a5, 0x3a6, 0x3a7,
0x3a8, 0x3a9, 0x3aa, 0x3ab,
0x3ac, 0x3ad, 0x3ae, 0x3af,
0x3b0, 0x3b1, 0x3b2, 0x3b3,
0x3b4, 0x3b5, 0x3b6, 0x3b7,
0x3b8, 0x3b9, 0x3ba, 0x3bb,
0x3bc, 0x3bd, 0x3be, 0x3bf,
0x3c0, 0x3c1, 0x3c2, 0x3c3,
0x3c4, 0x3c5, 0x3c6, 0x3c7,
0x3c8, 0x3c9, 0x3ca, 0x3cb,
0x3cc, 0x3cd, 0x3ce, 0x0
},
{
// code page: ISO 8859-8
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x0, 0xa2, 0xa3,
0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xd7, 0xab,
0xac, 0xad, 0xae, 0x203e,
0xb0, 0xb1, 0xb2, 0xb3,
0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xf7, 0xbb,
0xbc, 0xbd, 0xbe, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x2017,
0x5d0, 0x5d1, 0x5d2, 0x5d3,
0x5d4, 0x5d5, 0x5d6, 0x5d7,
0x5d8, 0x5d9, 0x5da, 0x5db,
0x5dc, 0x5dd, 0x5de, 0x5df,
0x5e0, 0x5e1, 0x5e2, 0x5e3,
0x5e4, 0x5e5, 0x5e6, 0x5e7,
0x5e8, 0x5e9, 0x5ea, 0x0,
0x0, 0x0, 0x0, 0x0
},
{
// code page: ISO 8859-9
0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b,
0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3,
0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab,
0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3,
0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb,
0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf,
0x11e, 0xd1, 0xd2, 0xd3,
0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb,
0xdc, 0x130, 0x15e, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef,
0x11f, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0x131, 0x15f, 0xff
}
};

View File

@ -24,12 +24,12 @@ enum Resource_FormatType
{
Resource_FormatType_SJIS, //!< SJIS (Shift Japanese Industrial Standards) encoding
Resource_FormatType_EUC, //!< EUC (Extended Unix Code) multi-byte encoding primarily for Japanese, Korean, and simplified Chinese
Resource_FormatType_ANSI, //!< ANSI encoding (pass through without conversion)
Resource_FormatType_NoConversion, //!< format type indicating non-conversion behavior
Resource_FormatType_GB, //!< GB (Guobiao) encoding for Simplified Chinese
Resource_FormatType_UTF8, //!< multi-byte UTF-8 encoding
Resource_FormatType_SystemLocale, //!< active system-defined locale; this value is strongly NOT recommended to use
// non ASCII format types
// Windows-native ("ANSI") 8-bit code pages
Resource_FormatType_CP1250, //!< cp1250 (Central European) encoding
Resource_FormatType_CP1251, //!< cp1251 (Cyrillic) encoding
Resource_FormatType_CP1252, //!< cp1252 (Western European) encoding
@ -40,7 +40,19 @@ enum Resource_FormatType
Resource_FormatType_CP1257, //!< cp1257 (Baltic) encoding
Resource_FormatType_CP1258, //!< cp1258 (Vietnamese) encoding
// ISO8859 8-bit code pages
Resource_FormatType_iso8859_1, //!< ISO 8859-1 (Western European) encoding
Resource_FormatType_iso8859_2, //!< ISO 8859-2 (Central European) encoding
Resource_FormatType_iso8859_3, //!< ISO 8859-3 (Turkish) encoding
Resource_FormatType_iso8859_4, //!< ISO 8859-4 (Northern European) encoding
Resource_FormatType_iso8859_5, //!< ISO 8859-5 (Cyrillic) encoding
Resource_FormatType_iso8859_6, //!< ISO 8859-6 (Arabic) encoding
Resource_FormatType_iso8859_7, //!< ISO 8859-7 (Greek) encoding
Resource_FormatType_iso8859_8, //!< ISO 8859-8 (Hebrew) encoding
Resource_FormatType_iso8859_9, //!< ISO 8859-9 (Turkish) encoding
// old aliases
Resource_FormatType_ANSI = Resource_FormatType_NoConversion,
Resource_SJIS = Resource_FormatType_SJIS,
Resource_EUC = Resource_FormatType_EUC,
Resource_ANSI = Resource_FormatType_ANSI,

View File

@ -22,7 +22,7 @@
#include <TCollection_ExtendedString.hxx>
#include <NCollection_UtfString.hxx>
#include <Standard_NotImplemented.hxx>
#include "Resource_ANSI.pxx"
#include "Resource_CodePages.pxx"
#include "Resource_GBK.pxx"
#include "Resource_Big5.pxx"
@ -625,20 +625,30 @@ void Resource_Unicode::ConvertFormatToUnicode (const Resource_FormatType theForm
case Resource_FormatType_CP1256:
case Resource_FormatType_CP1257:
case Resource_FormatType_CP1258:
case Resource_FormatType_iso8859_1:
case Resource_FormatType_iso8859_2:
case Resource_FormatType_iso8859_3:
case Resource_FormatType_iso8859_4:
case Resource_FormatType_iso8859_5:
case Resource_FormatType_iso8859_6:
case Resource_FormatType_iso8859_7:
case Resource_FormatType_iso8859_8:
case Resource_FormatType_iso8859_9:
{
const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250;
const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex];
theToStr.Clear();
for (const char* anInputPntr = theFromStr; *anInputPntr != '\0'; ++anInputPntr)
{
Standard_ExtCharacter aRes = (*anInputPntr & 0x80) != 0
? aCodePage[(0x7f & *anInputPntr)]
: *anInputPntr;
if (aRes == (Standard_ExtCharacter)0x0)
unsigned char anInputChar = (unsigned char)(*anInputPntr);
Standard_ExtCharacter aRes = (anInputChar & 0x80) != 0
? aCodePage[(0x7f & anInputChar)]
: anInputChar;
if (aRes == 0)
{
aRes = '?';
}
theToStr.Insert(theToStr.Length() + 1, aRes);
theToStr.AssignCat(aRes);
}
break;
}
@ -689,8 +699,52 @@ Standard_Boolean Resource_Unicode::ConvertUnicodeToFormat(const Resource_FormatT
case Resource_FormatType_CP1256:
case Resource_FormatType_CP1257:
case Resource_FormatType_CP1258:
case Resource_FormatType_iso8859_1:
case Resource_FormatType_iso8859_2:
case Resource_FormatType_iso8859_3:
case Resource_FormatType_iso8859_4:
case Resource_FormatType_iso8859_5:
case Resource_FormatType_iso8859_6:
case Resource_FormatType_iso8859_7:
case Resource_FormatType_iso8859_8:
case Resource_FormatType_iso8859_9:
{
throw Standard_NotImplemented("Resource_Unicode::ConvertUnicodeToFormat - conversion from CP1250 - CP1258 to Unicode is not implemented");
if (theMaxSize < theFromStr.Length())
{
return Standard_False;
}
const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250;
const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex];
for (Standard_Integer aToCharInd = 0; aToCharInd < theMaxSize - 1; ++aToCharInd)
{
Standard_Boolean isFind = Standard_False;
Standard_ExtCharacter aFromChar = theFromStr.Value(aToCharInd + 1);
if (aFromChar == 0)
{
// zero value should be handled explicitly to avoid false conversion by
// selected code page that may have unused values (encoded as zero)
theToStr[aToCharInd] = '\0';
}
else
{
// find the character in the code page
for (unsigned char anIndCP = 0; aFromChar != 0 && anIndCP < 128; ++anIndCP)
{
if (aCodePage[anIndCP] == aFromChar)
{
theToStr[aToCharInd] = anIndCP | 0x80;
isFind = Standard_True;
}
}
// if character is not found, put '?'
if (!isFind)
{
theToStr[aToCharInd] = '?';
}
}
}
theToStr[theMaxSize - 1] = '\0';
return Standard_True;
}
case Resource_FormatType_UTF8:
{

View File

@ -69,26 +69,5 @@ Standard_Boolean STEPCAFControl_Controller::Init ()
Interface_Static::Init ("stepcaf", "read.stepcaf.subshapes.name", '&', "eval On"); // 1
Interface_Static::SetIVal("read.stepcaf.subshapes.name", 0); // Disabled by default
// STEP file encoding for names translation
// Note: the numbers should be consistent with Resource_FormatType enumeration
Interface_Static::Init ("step", "read.stepcaf.codepage", 'e', "");
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "enum 0");
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval SJIS"); // Resource_FormatType_SJIS
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval EUC"); // Resource_FormatType_EUC
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval ANSI"); // Resource_FormatType_ANSI
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval GB"); // Resource_FormatType_GB
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval UTF8"); // Resource_FormatType_UTF8
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval SystemLocale"); // Resource_FormatType_SystemLocale
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1250"); // Resource_FormatType_CP1250
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1251"); // Resource_FormatType_CP1251
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1252"); // Resource_FormatType_CP1252
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1253"); // Resource_FormatType_CP1253
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1254"); // Resource_FormatType_CP1254
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1255"); // Resource_FormatType_CP1255
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1256"); // Resource_FormatType_CP1256
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1257"); // Resource_FormatType_CP1257
Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1258"); // Resource_FormatType_CP1258
Interface_Static::SetCVal ("read.stepcaf.codepage", "UTF8");
return Standard_True;
}

View File

@ -281,7 +281,6 @@
#include <Transfer_ActorOfTransientProcess.hxx>
#include <Bnd_Box.hxx>
#include <BRepBndLib.hxx>
#include <Resource_Unicode.hxx>
// skl 21.08.2003 for reading G&DT
//#include <StepRepr_CompoundItemDefinition.hxx>
@ -321,8 +320,7 @@ TCollection_AsciiString AddrToString(const TopoDS_Shape& theShape)
//=======================================================================
STEPCAFControl_Reader::STEPCAFControl_Reader()
: mySourceCodePage (Resource_FormatType_UTF8),
myColorMode(Standard_True),
: myColorMode(Standard_True),
myNameMode(Standard_True),
myLayerMode(Standard_True),
myPropsMode(Standard_True),
@ -332,7 +330,6 @@ STEPCAFControl_Reader::STEPCAFControl_Reader()
myViewMode(Standard_True)
{
STEPCAFControl_Controller::Init();
mySourceCodePage = (Resource_FormatType )Interface_Static::IVal ("read.stepcaf.codepage");
}
@ -343,8 +340,7 @@ STEPCAFControl_Reader::STEPCAFControl_Reader()
STEPCAFControl_Reader::STEPCAFControl_Reader(const Handle(XSControl_WorkSession)& WS,
const Standard_Boolean scratch)
: mySourceCodePage (Resource_FormatType_UTF8),
myColorMode(Standard_True),
: myColorMode(Standard_True),
myNameMode(Standard_True),
myLayerMode(Standard_True),
myPropsMode(Standard_True),
@ -354,7 +350,6 @@ STEPCAFControl_Reader::STEPCAFControl_Reader(const Handle(XSControl_WorkSession)
myViewMode(Standard_True)
{
STEPCAFControl_Controller::Init();
mySourceCodePage = (Resource_FormatType )Interface_Static::IVal ("read.stepcaf.codepage");
Init(WS, scratch);
}
@ -386,9 +381,10 @@ void STEPCAFControl_Reader::Init(const Handle(XSControl_WorkSession)& WS,
//=======================================================================
TCollection_ExtendedString STEPCAFControl_Reader::convertName (const TCollection_AsciiString& theName) const
{
TCollection_ExtendedString aName;
Resource_Unicode::ConvertFormatToUnicode (mySourceCodePage, theName.ToCString(), aName);
return aName;
// If source code page is not a NoConversion
// the string is treated as having UTF-8 coding,
// else each character is copied to ExtCharacter.
return TCollection_ExtendedString (theName, SourceCodePage() != Resource_FormatType_NoConversion);
}
//=======================================================================
@ -4787,6 +4783,26 @@ Standard_Boolean STEPCAFControl_Reader::GetNameMode() const
return myNameMode;
}
//=======================================================================
//function : SourceCodePage
//purpose :
//=======================================================================
Resource_FormatType STEPCAFControl_Reader::SourceCodePage() const
{
return myReader.StepModel()->SourceCodePage();
}
//=======================================================================
//function : SetSourceCodePage
//purpose :
//=======================================================================
void STEPCAFControl_Reader::SetSourceCodePage(Resource_FormatType theCode)
{
myReader.StepModel()->SetSourceCodePage(theCode);
}
//=======================================================================
//function : SetLayerMode
//purpose :

View File

@ -142,11 +142,11 @@ public:
Standard_EXPORT Standard_Boolean GetNameMode() const;
//! Return the encoding of STEP file for converting names into UNICODE.
//! Initialized from "read.stepcaf.codepage" variable by constructor, which is Resource_UTF8 by default.
Resource_FormatType SourceCodePage() const { return mySourceCodePage; }
//! Initialized from "read.step.codepage" variable by constructor, which is Resource_UTF8 by default.
Standard_EXPORT Resource_FormatType SourceCodePage() const;
//! Return the encoding of STEP file for converting names into UNICODE.
void SetSourceCodePage (Resource_FormatType theCode) { mySourceCodePage = theCode; }
Standard_EXPORT void SetSourceCodePage (Resource_FormatType theCode);
//! Set LayerMode for indicate read Layers or not.
Standard_EXPORT void SetLayerMode (const Standard_Boolean layermode);
@ -301,7 +301,6 @@ private:
STEPControl_Reader myReader;
NCollection_DataMap<TCollection_AsciiString, Handle(STEPCAFControl_ExternFile)> myFiles;
Resource_FormatType mySourceCodePage;
Standard_Boolean myColorMode;
Standard_Boolean myNameMode;
Standard_Boolean myLayerMode;

View File

@ -217,6 +217,38 @@ STEPControl_Controller::STEPControl_Controller ()
Interface_Static::Init("step", "read.step.root.transformation", '&', "eval OFF");
Interface_Static::SetCVal("read.step.root.transformation", "ON");
// STEP file encoding for names translation
// Note: the numbers should be consistent with Resource_FormatType enumeration
Interface_Static::Init("step", "read.step.codepage", 'e', "");
Interface_Static::Init("step", "read.step.codepage", '&', "enum 0");
Interface_Static::Init("step", "read.step.codepage", '&', "eval SJIS"); // Resource_FormatType_SJIS
Interface_Static::Init("step", "read.step.codepage", '&', "eval EUC"); // Resource_FormatType_EUC
Interface_Static::Init("step", "read.step.codepage", '&', "eval NoConversion"); // Resource_FormatType_NoConversion
Interface_Static::Init("step", "read.step.codepage", '&', "eval GB"); // Resource_FormatType_GB
Interface_Static::Init("step", "read.step.codepage", '&', "eval UTF8"); // Resource_FormatType_UTF8
Interface_Static::Init("step", "read.step.codepage", '&', "eval SystemLocale"); // Resource_FormatType_SystemLocale
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1250"); // Resource_FormatType_CP1250
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1251"); // Resource_FormatType_CP1251
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1252"); // Resource_FormatType_CP1252
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1253"); // Resource_FormatType_CP1253
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1254"); // Resource_FormatType_CP1254
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1255"); // Resource_FormatType_CP1255
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1256"); // Resource_FormatType_CP1256
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1257"); // Resource_FormatType_CP1257
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1258"); // Resource_FormatType_CP1258
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-1"); // Resource_FormatType_iso8859_1
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-2"); // Resource_FormatType_iso8859_2
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-3"); // Resource_FormatType_iso8859_3
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-4"); // Resource_FormatType_iso8859_4
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-5"); // Resource_FormatType_iso8859_5
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-6"); // Resource_FormatType_iso8859_6
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-7"); // Resource_FormatType_iso8859_7
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-8"); // Resource_FormatType_iso8859_8
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-9"); // Resource_FormatType_iso8859_9
Interface_Static::SetCVal("read.step.codepage", "UTF8");
Standard_STATIC_ASSERT((int)Resource_FormatType_iso8859_9 - (int)Resource_FormatType_CP1250 == 17); // "Error: Invalid Codepage Enumeration"
init = Standard_True;
}

View File

@ -28,12 +28,14 @@
#include <StepData_StepModel.hxx>
#include <StepData_StepWriter.hxx>
#include <TCollection_HAsciiString.hxx>
#include <Interface_Static.hxx>
#include <stdio.h>
IMPLEMENT_STANDARD_RTTIEXT(StepData_StepModel,Interface_InterfaceModel)
// Entete de fichier : liste d entites
StepData_StepModel::StepData_StepModel () { }
StepData_StepModel::StepData_StepModel () :mySourceCodePage((Resource_FormatType)Interface_Static::IVal("read.step.codepage"))
{}
Handle(Standard_Transient) StepData_StepModel::Entity

View File

@ -20,6 +20,7 @@
#include <Interface_EntityList.hxx>
#include <TColStd_HArray1OfInteger.hxx>
#include <Interface_InterfaceModel.hxx>
#include <Resource_FormatType.hxx>
class Standard_NoSuchObject;
class Standard_Transient;
@ -97,7 +98,12 @@ public:
//! same form as for PrintLabel
Standard_EXPORT Handle(TCollection_HAsciiString) StringLabel (const Handle(Standard_Transient)& ent) const Standard_OVERRIDE;
//! Return the encoding of STEP file for converting names into UNICODE.
//! Initialized from "read.step.codepage" variable by constructor, which is Resource_UTF8 by default.
Resource_FormatType SourceCodePage() const { return mySourceCodePage; }
//! Return the encoding of STEP file for converting names into UNICODE.
void SetSourceCodePage (Resource_FormatType theCode) { mySourceCodePage = theCode; }
DEFINE_STANDARD_RTTIEXT(StepData_StepModel,Interface_InterfaceModel)
@ -112,6 +118,7 @@ private:
Interface_EntityList theheader;
Handle(TColStd_HArray1OfInteger) theidnums;
Resource_FormatType mySourceCodePage;
};

View File

@ -18,6 +18,7 @@
#include <Interface_FileParameter.hxx>
#include <Interface_HArray1OfHAsciiString.hxx>
#include <Interface_Macros.hxx>
#include <Interface_Static.hxx>
#include <Interface_ParamList.hxx>
#include <Message.hxx>
#include <Message_Messenger.hxx>
@ -37,6 +38,8 @@
#include <StepData_StepModel.hxx>
#include <StepData_StepReaderData.hxx>
#include <TCollection_AsciiString.hxx>
#include <TCollection_ExtendedString.hxx>
#include <NCollection_UtfIterator.hxx>
#include <TCollection_HAsciiString.hxx>
#include <TColStd_Array1OfInteger.hxx>
#include <TColStd_HArray1OfInteger.hxx>
@ -46,6 +49,7 @@
#include <TColStd_IndexedMapOfInteger.hxx>
#include <TColStd_SequenceOfInteger.hxx>
#include <StepData_UndefinedEntity.hxx>
#include <Resource_Unicode.hxx>
#include <stdio.h>
IMPLEMENT_STANDARD_RTTIEXT(StepData_StepReaderData, Interface_FileReaderData)
@ -61,7 +65,6 @@ static char txtmes[200]; // plus commode que redeclarer partout
static Standard_Boolean initstr = Standard_False;
#define Maxlst 64
//static TCollection_AsciiString subl[Maxlst]; // Maxlst : minimum 10
@ -69,36 +72,231 @@ static Standard_Integer acceptvoid = 0;
// ---------- Fonctions Utilitaires ----------
//! Convert unsigned character to hexadecimal system,
//! if character hasn't representation in this system, returns 0.
static Standard_Integer convertCharacterTo16bit(const unsigned char theCharacter)
{
switch (theCharacter)
{
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'A': return 10;
case 'B': return 11;
case 'C': return 12;
case 'D': return 13;
case 'E': return 14;
case 'F': return 15;
default : return 0;
}
}
//=======================================================================
//function : CleanText
//function : cleanText
//purpose :
//=======================================================================
static void CleanText(const Handle(TCollection_HAsciiString)& val)
void StepData_StepReaderData::cleanText(const Handle(TCollection_HAsciiString)& theVal) const
{
Standard_Integer n = val->Length(); // avant reduction
val->Remove(n);
val->Remove(1);
// Ne pas oublier de traiter les caracteres speciaux
Standard_Integer n = theVal->Length(); // string size before reduction
theVal->Remove(n);
theVal->Remove(1);
// Don't forget to treat the special characters
for (Standard_Integer i = n - 2; i > 0; i--) {
char uncar = val->Value(i);
if (uncar == '\n')
{ val->Remove(i); if (i < n-2) uncar = val->Value(i); }
if (uncar == '\'' && i < n - 2) {
if (val->Value(i + 1) == '\'') { val->Remove(i + 1); continue; }
char aChar = theVal->Value(i);
if (aChar == '\n')
{ theVal->Remove(i); if (i < n-2) aChar = theVal->Value(i); }
if (aChar == '\'' && i < n - 2) {
if (theVal->Value(i + 1) == '\'') { theVal->Remove(i + 1); continue; }
}
if (uncar == '\\' && i < n - 2) {
if (val->Value(i + 1) == '\\') { val->Remove(i + 1); continue; }
}
else if (uncar == '\\' && i < n - 3) {
if (val->Value(i + 2) == '\\') {
if (val->Value(i + 1) == 'N')
{ val->SetValue(i,'\n'); val->Remove(i+1,2); continue; }
if (val->Value(i + 1) == 'T')
{ val->SetValue(i,'\t'); val->Remove(i+1,2); continue; }
if (aChar == '\\' && i < n - 3) {
if (theVal->Value(i + 2) == '\\') {
if (theVal->Value(i + 1) == 'N')
{ theVal->SetValue(i,'\n'); theVal->Remove(i+1,2); continue; }
if (theVal->Value(i + 1) == 'T')
{ theVal->SetValue(i,'\t'); theVal->Remove(i+1,2); continue; }
}
}
}
// pass through without conversion the control directives
if (mySourceCodePage == Resource_FormatType_NoConversion)
return;
Standard_Integer aFirstCharInd = 1; // begin index of substring to conversion before the control directives
Standard_Integer aLastCharInd = 1; // end index of substring to conversion before the control directives
TCollection_ExtendedString aTempExtString; // string for characters within control directives
TCollection_ExtendedString anOutputExtString; // string for conversion in UTF-8
Resource_FormatType aLocalFormatType = Resource_FormatType_iso8859_1; // a code page for a "\S\" control directive
for (Standard_Integer i = 1; i <= theVal->Length(); ++i)
{
unsigned char aChar = theVal->Value(i);
if (aChar != '\\' || (theVal->Length() - i) < 3) // does not contain the control directive
{
continue;
}
Standard_Integer aLocalLastCharInd = i - 1;
Standard_Boolean isConverted = Standard_False;
// Encoding ISO 8859 characters within a string;
// ("\P{N}\") control directive;
// indicates code page for ("\S\") control directive;
// {N}: "A", "B", "C", "D", "E", "F", "G", "H", "I";
// "A" identifies ISO 8859-1; "B" identifies ISO 8859-2, etc.
if (theVal->Value(i + 1) == 'P' && theVal->Length() - i > 3 && theVal->Value(i + 3) == '\\')
{
Standard_Character aPageId = UpperCase (theVal->Value(i + 2));
if (aPageId >= 'A' && aPageId <= 'I')
{
aLocalFormatType = (Resource_FormatType)(Resource_FormatType_iso8859_1 + (aPageId - 'A'));
}
else
{
thecheck->AddWarning("String control directive \\P*\\ with an unsupported symbol in place of *");
}
isConverted = Standard_True;
i += 3;
}
// Encoding ISO 8859 characters within a string;
// ("\S\") control directive;
// converts followed a LATIN CODEPOINT character.
else if (theVal->Value(i + 1) == 'S' && theVal->Length() - i > 2 && theVal->Value(i + 2) == '\\')
{
Standard_Character aResChar = theVal->Value(i + 3) | 0x80;
const char aStrForCovert[2] = { aResChar, '\0' };
Resource_Unicode::ConvertFormatToUnicode(aLocalFormatType, aStrForCovert, aTempExtString);
isConverted = Standard_True;
i += 3;
}
// Encoding U+0000 to U+00FF in a string
// ("\X\") control directive;
// converts followed two hexadecimal character.
else if (theVal->Value(i + 1) == 'X' && theVal->Length() - i > 3 && theVal->Value(i + 2) == '\\')
{
Standard_Character aResChar = (char)convertCharacterTo16bit(theVal->Value(i + 3));
aResChar = (aResChar << 4) | (char)convertCharacterTo16bit(theVal->Value(i + 4));
const char aStrForCovert[2] = { aResChar, '\0' };
aTempExtString = TCollection_ExtendedString(aStrForCovert, Standard_False); // pass through without conversion
isConverted = Standard_True;
i += 4;
}
// Encoding ISO 10646 characters within a string
// ("\X{N}\") control directive;
// {N}: "0", "2", "4";
// "\X2\" or "\X4\" converts followed a hexadecimal character sequence;
// "\X0\" indicate the end of the "\X2\" or "\X4\".
else if (theVal->Value(i + 1) == 'X' && theVal->Length() - i > 2 && theVal->Value(i + 3) == '\\')
{
Standard_Integer aFirstInd = i + 3;
Standard_Integer aLastInd = i;
Standard_Boolean isClosed = Standard_False;
for (; i <= theVal->Length() && !isClosed; ++i) // find the end of the "\X2\" or "\X4\" by an external "i"
{
if (theVal->Length() - i > 2 && theVal->Value(i) == '\\' && theVal->Value(i + 1) == 'X' && theVal->Value(i + 2) == '0' && theVal->Value(i + 3) == '\\')
{
aLastInd = i - 1;
i = i + 2;
isClosed = Standard_True;
}
}
if (!isClosed) // "\X0\" not exists
{
aLastInd = theVal->Length();
}
TCollection_AsciiString aBitString;
aBitString = TCollection_AsciiString(theVal->ToCString() + aFirstInd, aLastInd - aFirstInd);
aBitString.UpperCase(); // make valid for conversion into 16-bit
// "\X2\" control directive;
// followed by multiples of four or three hexadecimal characters.
// Encoding in UTF-16
if (theVal->Value(aFirstInd - 1) == '2' && theVal->Length() - aFirstInd > 3)
{
Standard_Integer anIterStep = (aBitString.Length() % 4 == 0) ? 4 : 3;
if (aBitString.Length() % anIterStep)
{
aTempExtString.AssignCat('?');
thecheck->AddWarning("String control directive \\X2\\ is followed by number of digits not multiple of 4");
}
else
{
Standard_Integer aStrLen = aBitString.Length() / anIterStep;
Standard_Utf16Char aUtfCharacter = '\0';
for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen * anIterStep; ++aCharInd)
{
aUtfCharacter |= convertCharacterTo16bit(aBitString.Value(aCharInd));
if (aCharInd % anIterStep == 0)
{
aTempExtString.AssignCat(aUtfCharacter);
aUtfCharacter = '\0';
}
aUtfCharacter = aUtfCharacter << 4;
}
}
}
// "\X4\" control directive;
// followed by multiples of eight hexadecimal characters.
// Encoding in UTF-32
else if (theVal->Value(aFirstInd - 1) == '4' && theVal->Length() - aFirstInd > 7)
{
if (aBitString.Length() % 8)
{
aTempExtString.AssignCat('?');
thecheck->AddWarning("String control directive \\X4\\ is followed by number of digits not multiple of 8");
}
else
{
Standard_Integer aStrLen = aBitString.Length() / 8;
Standard_Utf32Char aUtfCharacter[2] = {'\0', '\0'};
for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen * 8; ++aCharInd)
{
aUtfCharacter[0] |= convertCharacterTo16bit(aBitString.Value(aCharInd));
if (aCharInd % 8 == 0)
{
NCollection_Utf32Iter aUtfIter(aUtfCharacter);
Standard_Utf16Char aStringBuffer[3];
Standard_Utf16Char* aUtfPntr = aUtfIter.GetUtf16(aStringBuffer);
*aUtfPntr++ = '\0';
TCollection_ExtendedString aUtfString(aStringBuffer);
aTempExtString.AssignCat(aUtfString);
aUtfCharacter[0] = '\0';
}
aUtfCharacter[0] = aUtfCharacter[0] << 4;
}
}
}
isConverted = Standard_True;
}
if (isConverted) // find the control directive
{
TCollection_ExtendedString anExtString;
if (aFirstCharInd <= aLocalLastCharInd)
{
Resource_Unicode::ConvertFormatToUnicode(mySourceCodePage, theVal->SubString(aFirstCharInd, aLocalLastCharInd)->ToCString(), anExtString);
}
anOutputExtString.AssignCat(anExtString);
anOutputExtString.AssignCat(aTempExtString);
aFirstCharInd = i + 1;
aLastCharInd = aFirstCharInd;
aTempExtString.Clear();
}
}
if (aLastCharInd <= theVal->Length())
{
Resource_Unicode::ConvertFormatToUnicode(mySourceCodePage, theVal->ToCString() + aLastCharInd - 1, aTempExtString);
anOutputExtString.AssignCat(aTempExtString);
}
theVal->Clear();
TCollection_AsciiString aTmpString(anOutputExtString, 0);
theVal->AssignCat(aTmpString.ToCString());
}
// ------------- METHODES -------------
@ -109,9 +307,9 @@ static void CleanText(const Handle(TCollection_HAsciiString)& val)
StepData_StepReaderData::StepData_StepReaderData
(const Standard_Integer nbheader, const Standard_Integer nbtotal,
const Standard_Integer nbpar)
const Standard_Integer nbpar, const Resource_FormatType theSourceCodePage)
: Interface_FileReaderData(nbtotal, nbpar), theidents(1, nbtotal),
thetypes(1, nbtotal) //, themults (1,nbtotal)
thetypes(1, nbtotal), mySourceCodePage(theSourceCodePage) //, themults (1,nbtotal)
{
// char textnum[10];
thenbscop = 0; thenbents = 0; thelastn = 0; thenbhead = nbheader;
@ -564,7 +762,9 @@ Standard_Integer StepData_StepReaderData::ReadSub(const Standard_Integer numsub,
case 6: {
if (FT != Interface_ParamText) { kod = 0; break; }
Handle(TCollection_HAsciiString) txt = new TCollection_HAsciiString(str);
CleanText(txt); hst->SetValue(ip, txt); break;
cleanText(txt);
hst->SetValue(ip, txt);
break;
}
case 7: {
Handle(Standard_Transient) ent = BoundEntity(FP.EntityNumber());
@ -636,7 +836,9 @@ Standard_Integer StepData_StepReaderData::ReadSub(const Standard_Integer numsub,
case Interface_ParamLogical: break;
case Interface_ParamText: {
Handle(TCollection_HAsciiString) txt = new TCollection_HAsciiString(str);
CleanText(txt); htr->SetValue(ip, txt); break;
cleanText(txt);
htr->SetValue(ip, txt);
break;
}
case Interface_ParamSub: {
Handle(Standard_Transient) sub;
@ -714,7 +916,9 @@ Standard_Boolean StepData_StepReaderData::ReadField(const Standard_Integer num,
case Interface_ParamVoid: break;
case Interface_ParamText:
txt = new TCollection_HAsciiString(str);
CleanText(txt); fild.Set(txt); break;
cleanText(txt);
fild.Set(txt);
break;
case Interface_ParamEnum:
if (!strcmp(str, ".T.")) fild.SetLogical(StepData_LTrue);
else if (!strcmp(str, ".F.")) fild.SetLogical(StepData_LFalse);
@ -841,7 +1045,7 @@ Standard_Boolean StepData_StepReaderData::ReadAny(const Standard_Integer num,
case Interface_ParamLogical: break;
case Interface_ParamText: {
Handle(TCollection_HAsciiString) txt = new TCollection_HAsciiString(str);
CleanText(txt);
cleanText(txt);
// PDN May 2000: for reading SOURCE_ITEM (external references)
if (!val.IsNull()) {
@ -1242,7 +1446,7 @@ Standard_Boolean StepData_StepReaderData::ReadString(const Standard_Integer num,
CleanText (val);
}*/
val = new TCollection_HAsciiString(FP.CValue());
CleanText(val);
cleanText(val);
} else {
if (acceptvoid && FP.ParamType() == Interface_ParamVoid) warn = Standard_True;
errmess = new String("Parameter n0.%d (%s) not a quoted String");

View File

@ -19,6 +19,7 @@
#include <Standard.hxx>
#include <Standard_Type.hxx>
#include <Resource_FormatType.hxx>
#include <TColStd_Array1OfInteger.hxx>
#include <Interface_IndexedMapOfAsciiString.hxx>
@ -63,7 +64,7 @@ public:
//! creation time, because it contains arrays)
//! nbheader is nb of records for Header, nbtotal for Header+Data
//! and nbpar gives the total count of parameters
Standard_EXPORT StepData_StepReaderData(const Standard_Integer nbheader, const Standard_Integer nbtotal, const Standard_Integer nbpar);
Standard_EXPORT StepData_StepReaderData(const Standard_Integer nbheader, const Standard_Integer nbtotal, const Standard_Integer nbpar, const Resource_FormatType theSourceCodePage = Resource_FormatType_UTF8);
//! Fills the fields of a record
Standard_EXPORT void SetRecord (const Standard_Integer num, const Standard_CString ident, const Standard_CString type, const Standard_Integer nbpar);
@ -349,6 +350,16 @@ private:
//! If found, returns its EntityNumber, else returns Zero.
Standard_EXPORT Standard_Integer FindEntityNumber (const Standard_Integer num, const Standard_Integer id) const;
//! Prepare string to use in OCCT exchange structure.
//! If code page is Resource_FormatType_NoConversion,
//! clean only special characters without conversion;
//! else convert a string to UTF8 using the code page
//! and handle the control directives.
Standard_EXPORT void cleanText(const Handle(TCollection_HAsciiString)& theVal) const;
private:
TColStd_Array1OfInteger theidents;
TColStd_Array1OfInteger thetypes;
Interface_IndexedMapOfAsciiString thenametypes;
@ -358,6 +369,7 @@ private:
Standard_Integer thenbhead;
Standard_Integer thenbscop;
Handle(Interface_Check) thecheck;
Resource_FormatType mySourceCodePage;
};

View File

@ -143,7 +143,7 @@ static Standard_Integer StepFile_Read (const char* theName,
Standard_Integer nbhead, nbrec, nbpar;
lir_file_nbr (&nbhead,&nbrec,&nbpar); // renvoi par lex/yacc
Handle(StepData_StepReaderData) undirec =
new StepData_StepReaderData(nbhead,nbrec,nbpar); // creation tableau de records
new StepData_StepReaderData(nbhead,nbrec,nbpar, stepmodel->SourceCodePage()); // creation tableau de records
for ( Standard_Integer nr = 1; nr <= nbrec; nr ++) {
int nbarg; char* ident; char* typrec = 0;

View File

@ -258,12 +258,13 @@ TCollection_ExtendedString::TCollection_ExtendedString
// Create an extendedstring from an AsciiString
//---------------------------------------------------------------------------
TCollection_ExtendedString::TCollection_ExtendedString
(const TCollection_AsciiString& theString)
(const TCollection_AsciiString& theString,
const Standard_Boolean isMultiByte)
{
mylength = nbSymbols (theString.ToCString());
mystring = allocateExtChars (mylength);
mystring[mylength] = 0;
if (ConvertToUnicode (theString.ToCString()))
if (isMultiByte && ConvertToUnicode (theString.ToCString()))
{
return;
}
@ -304,6 +305,20 @@ void TCollection_ExtendedString::AssignCat (const TCollection_ExtendedString& th
mystring[mylength] = 0;
}
// ----------------------------------------------------------------------------
// AssignCat
// ----------------------------------------------------------------------------
void TCollection_ExtendedString::AssignCat(const Standard_Utf16Char theChar)
{
if (theChar != '\0')
{
mystring = reallocateExtChars(mystring, mylength + 1);
mystring[mylength] = theChar;
mylength += 1;
mystring[mylength] = '\0';
}
}
// ----------------------------------------------------------------------------
// Cat
// ----------------------------------------------------------------------------

View File

@ -114,8 +114,9 @@ public:
//! Creation by converting an Ascii string to an extended
//! string. The string is treated as having UTF-8 coding.
//! If it is not a UTF-8 then each character is copied to ExtCharacter.
Standard_EXPORT TCollection_ExtendedString(const TCollection_AsciiString& astring);
//! If it is not a UTF-8 or multi byte then
//! each character is copied to ExtCharacter.
Standard_EXPORT TCollection_ExtendedString(const TCollection_AsciiString& astring, const Standard_Boolean isMultiByte = Standard_True);
//! Appends the other extended string to this extended string.
//! Note that this method is an alias of operator +=.
@ -126,6 +127,9 @@ void operator += (const TCollection_ExtendedString& other)
AssignCat(other);
}
//! Appends the utf16 char to this extended string.
Standard_EXPORT void AssignCat (const Standard_Utf16Char theChar);
//! Appends <other> to me.
Standard_EXPORT TCollection_ExtendedString Cat (const TCollection_ExtendedString& other) const;
TCollection_ExtendedString operator + (const TCollection_ExtendedString& other) const

122
tests/bugs/step/bug28454_1 Normal file
View File

@ -0,0 +1,122 @@
puts "================"
puts "0028454: Data Exchange - Names with Special Characters Cannot Be Read from STEP or IGES Files"
puts "Target encodings: ISO 8859-1, ISO 8859-2, ISO 8859-3, ISO 8859-4, ISO 8859-5, ISO 8859-6,, ISO 8859-7, ISO 8859-8"
puts "Test case:"
puts "1) Creates a temporary STEP file-template using WriteStep."
puts "2) Reads generated template and replaces @tmp_name@ entity in it with target language characters using Tcl."
puts "3) Generates 2 STEP files in UTF-8 and ISO 8859-(N) encodings (converted by Tcl)."
puts "4) Reads generated files using StepRead and validates entity name."
puts "================"
puts ""
proc fileToString { thePath } {
set aFile [open "$thePath" r]
set aText [read $aFile [file size "$thePath"]]
close $aFile
return $aText
}
proc fileFromString { thePath theContent theCodePage } {
set aFile [open "$thePath" w]
fconfigure $aFile -translation lf -encoding "$theCodePage"
puts $aFile $theContent
close $aFile
}
proc fileCreateAndCompare { thePathFrom theUtfPathTo theCpPathTo theNameFrom theNameTo theCodePage } {
regsub -all -- $theNameFrom [fileToString "$thePathFrom"] "$theNameTo" aContent
set aPathTo $theCpPathTo${theCodePage}.stp
fileFromString "$theUtfPathTo" "$aContent" "utf-8"
fileFromString "$aPathTo" "$aContent" "$theCodePage"
param read.step.codepage UTF8
ReadStep U "$theUtfPathTo"
ReadStep A "$aPathTo"
param read.step.codepage "$theCodePage"
ReadStep ISO "$aPathTo"
if { [GetName U 0:1:1:1] != "$theNameTo" } { puts "Error: unable to read UTF-8 STEP" }
if { [GetName ISO 0:1:1:1] != "$theNameTo" } { puts "Error: unable to read $theCodePage STEP" }
if { [GetName A 0:1:1:1] == "$theNameTo" } { puts "Error: broken test case" }
catch { Close A }
catch { Close U }
catch { Close ISO }
}
pload XDE OCAF MODELING VISUALIZATION
set aTmpNameTmpl "@tmp_name@"
set aTmpFileTmpl "${imagedir}/${casename}-tmp.stp"
set aTmpFileUtf8 "${imagedir}/${casename}-tmp-utf8.stp"
set aTmpFileISO8859N "${imagedir}/${casename}-tmp-"
# "Test" (english multi-encoding) + "Test" (encoding in the target language)
# multi-encoding
set anEngName [encoding convertfrom utf-8 "\x54\x65\x73\x74"]
# ISO 8859-1
set iso8859_1 [encoding convertfrom utf-8 "\x50\x72\x6f\x62\xed\x68\xe1"]
# ISO 8859-2
set iso8859_2 [encoding convertfrom utf-8 "\x50\x72\x6f\x62\xed\x68\xe1"]
# ISO 8859-3
set iso8859_3 [encoding convertfrom utf-8 "\xd6\x6c\xe7\x65\x6b"]
# ISO 8859-4
set iso8859_4 [encoding convertfrom utf-8 "\x6d\xc4\x93\xc4\xa3\x69\x6e\x69\x65\x74"]
# ISO 8859-5
set iso8859_5 [encoding convertfrom utf-8 "\xD0\xa2\xD0\xB5\xD1\x81\xD1\x82"]
# ISO 8859-6
set iso8859_6 [encoding convertfrom utf-8 "\xd8\xa7\xd8\xae\xd8\xaa\xd8\xa8\xd8\xa7\xd8\xb1"]
# ISO 8859-7
set iso8859_7 [encoding convertfrom utf-8 "\xce\xb4\xcf\x80\xce\xa8\xce\xae"]
# ISO 8859-8
set iso8859_8 [encoding convertfrom utf-8 "\xc2\xb1\xd7\xa4\xd7\x9e\xd7\x9c\xd7\xa9"]
# ISO 8859-9
set iso8859_9 [encoding convertfrom utf-8 "\xc4\x9f\xc5\x9f\xc4\x9e\xc5\x9e\xc3\x86"]
box b 1 2 3
catch { Close A }
catch { Close T }
catch { Close U }
catch { Close CP }
XNewDoc T
XAddShape T b 0
XSetColor T b 1 0 0
SetName T 0:1:1:1 "$aTmpNameTmpl"
GetName T 0:1:1:1
WriteStep T "$aTmpFileTmpl"
puts "ISO 8859-1"
set aName "$anEngName $iso8859_1"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-1"
puts "ISO 8859-2"
set aName "$anEngName $iso8859_2"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-2"
puts "ISO 8859-3"
set aName "$anEngName $iso8859_3"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-3"
puts "ISO 8859-4"
set aName "$anEngName $iso8859_4"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-4"
puts "ISO 8859-5"
set aName "$anEngName $iso8859_5"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-5"
puts "ISO 8859-6"
set aName "$anEngName $iso8859_6"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-6"
puts "ISO 8859-7"
set aName "$anEngName $iso8859_7"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-7"
puts "ISO 8859-8"
set aName "$anEngName $iso8859_8"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-8"
puts "ISO 8859-9"
set aName "$anEngName $iso8859_9"
fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-9"

View File

@ -0,0 +1,78 @@
puts "================"
puts "0028454: Data Exchange - Names with Special Characters Cannot Be Read from STEP or IGES Files"
puts "Target control directives: (\X\), (\S\), (\P*\), (\X0\), (\X2\), (\X4\)"
puts "================"
puts ""
pload OCAF
catch {Close D}
# Read File
ReadStep D [locate_data_file bug28454_directives.stp]
# Checking
puts {# STEP string: 'AaBbCc\X2\00C4\X0\\X2\00E4\X0\\X2\00D6\X0\\X2\00F6\X0\\X2\00DC\X0\\X2\00FC\X0\\X2\00DF\X0\*,.-;:_'}
if { [GetName D 0:1:1:1] != "AaBbCcÄäÖöÜüß*,.-;:_" } {
puts "Error: name has been lost on conversion \X2\ directives"
}
puts {# STEP string: '\X2\03C0\X0\'}
if { [GetName D 0:1:1:2] != "π" } {
puts "Error: name has been lost on conversion \X2\ directive"
}
puts {# STEP string: 'Expression: \X2\03B103B2\X0\\\X2\03B3\X0\*\X2\03C0\X0\'}
if { [GetName D 0:1:1:3] != "Expression: αβ\\γ*π" } {
puts "Error: name has been lost on conversion \X2\ directive"
}
puts {# STEP string: '\X4\0001F7CA\X0\'}
# Note hack with conversion to utf-8, needed to get the strings comparison work for Unicode symbols above BMP
if { [encoding convertto utf-8 [GetName D 0:1:1:4]] !=
[encoding convertto utf-8 "🟊"] } {
puts "Error: name has been lost on conversion \X4\ directive"
}
puts {# STEP string: 'Star, arrow, faces: \X4\0001F7CA0001f80A0001f6370001f926\X0\ end'}
# Note hack with conversion to utf-8, needed to get the strings comparison work for Unicode symbols above BMP
if { [encoding convertto utf-8 [GetName D 0:1:1:5]] !=
[encoding convertto utf-8 "Star, arrow, faces: 🟊🠊😷🤦 end"] } {
puts "Error: name has been lost on conversion \X4\ directive"
}
puts {# STEP string: '\S\Drger'}
if { [GetName D 0:1:1:6] != "Ärger" } {
puts "Error: name has been lost on conversion \S\ directive"
}
puts {# STEP string: 'h\S\ttel'}
if { [GetName D 0:1:1:7] != "hôtel" } {
puts "Error: name has been lost on conversion \S\ directive"
}
puts {# STEP string: '\PE\\S\*\S\U\S\b'}
if { [GetName D 0:1:1:8] != "Њет" } {
puts "Error: name has been lost on conversion \PE\ and \S\ directives"
}
puts {# STEP string: 'see \X\A7 4.1'}
if { [GetName D 0:1:1:9] != "see § 4.1" } {
puts "Error: name has been lost on conversion \X\ directive"
}
puts {# STEP string: 'line one\X\0Aline two'}
if { [GetName D 0:1:1:10] != "line one\nline two" } {
puts "Error: name has been lost on conversion \X\ directives"
}
puts {# STEP string: 'Expression: \X\B13\X2\03C0\X0\*\X2\03C0\X0\+12'}
if { [GetName D 0:1:1:11] != "Expression: ±3π*π+12" } {
puts "Error: name has been lost on conversion \X2\ and \X\ directives"
}
puts {# STEP string: 'Expression: \X2\03B1\X0\\PC\*\X2\03B2\X0\\S\w\X2\03B3'}
if { [GetName D 0:1:1:12] != "Expression: α*β÷γ" } {
puts "Error: name has been lost on conversion \X2\,\PC\ and \S\ directives"
}
puts {# STEP string: 'T\PE\\S\5C\S\D'}
if { [GetName D 0:1:1:13] != "TЕCТ" } {
puts "Error: name has been lost on conversion \S\ and \PE\ directives"
}
puts {# STEP string: 'A\S\DaBbCc\X2\0C4\X0\'h\S\t\X2\0E4\X0\\X4\0001F7CA\X0\\X\0A\X4\0001F7CA\X0\*,.-;:_\X2\0D6\X0\\PE\\S\5'}
if { [encoding convertto utf-8 [GetName D 0:1:1:14]] !=
[encoding convertto utf-8 "AÄaBbCcÄ'hôä🟊\n🟊*,.-;:_ÖЕ"] } {
puts "Error: name has been lost on conversion \X\,\X2\,\X4\ and \S\ directive"
}
puts {# STEP string: '\X2\3B13B23B3\X0\'}
if { [GetName D 0:1:1:15] != "αβγ" } {
puts "Error: name has been lost on conversion \X2\ directive"
}

View File

@ -46,10 +46,10 @@ regsub -all -- $aTmpNameTmpl [fileToString "$aTmpFileTmpl"] "$aName" aContent
fileFromString "$aTmpFileUtf8" "$aContent" "utf-8"
fileFromString "$aTmpFileGb" "$aContent" "gb2312"
param read.stepcaf.codepage UTF8
param read.step.codepage UTF8
ReadStep U "$aTmpFileUtf8"
ReadStep A "$aTmpFileGb"
param read.stepcaf.codepage GB
param read.step.codepage GB
ReadStep G "$aTmpFileGb"
if { [GetName U 0:1:1:1] != "$aName" } { puts "Error: unable to read UTF-8 STEP" }

View File

@ -6,7 +6,7 @@ puts ""
pload OCAF
# Read file
param read.stepcaf.codepage CP1251
param read.step.codepage CP1251
ReadStep D [locate_data_file bug31670_russian.stp]
# Checking

View File

@ -29,10 +29,10 @@ proc fileCreateAndCompare { thePathFrom theUtfPathTo theCpPathTo theNameFrom the
fileFromString "$theUtfPathTo" "$aContent" "utf-8"
fileFromString "$theCpPathTo" "$aContent" "$aCodePage"
param read.stepcaf.codepage UTF8
param read.step.codepage UTF8
ReadStep U "$theUtfPathTo"
ReadStep A "$theCpPathTo"
param read.stepcaf.codepage "$theCodePage"
param read.step.codepage "$theCodePage"
ReadStep CP "$theCpPathTo"
if { [GetName U 0:1:1:1] != "$theNameTo" } { puts "Error: unable to read UTF-8 STEP" }

View File

@ -1,5 +1,5 @@
set filename bug28315_part_step-pmi.stp
set ref_data {
D_First 0:1:7:1 0 27 0 "Nyn\\X2\\11b\\X0\\j\\X2\\161\\X0\\S\\m pohled modelu" parallel 13.0973082 1.62908566 -570.319287 0.0 0.0 -1.0 0 1 0 0.0 342.494354 342.494354
D_First 0:1:7:1 0 27 0 "Nynější pohled modelu" parallel 13.0973082 1.62908566 -570.319287 0.0 0.0 -1.0 0 1 0 0.0 342.494354 342.494354
}

View File

@ -3,7 +3,7 @@ set filename bug30315.stp
set ref_data {
D_First 0:1:7:1 0 0 0 "ALL" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
D_First 0:1:7:2 0 0 0 "CPC Process" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
D_First 0:1:7:3 0 0 0 "CPC D\\X2\\00E9\\X0\\tente" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
D_First 0:1:7:3 0 0 0 "CPC Détente" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
D_First 0:1:7:4 0 0 0 "REF A/B/C" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
D_First 0:1:7:5 0 0 0 "CTF A/B/C" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
}