From baf60a870caa0e7ff72858873f0160e5c3cbcb70 Mon Sep 17 00:00:00 2001 From: dpasukhi Date: Wed, 30 Sep 2020 15:54:25 +0300 Subject: [PATCH] 0031670: Data Exchange - cp1251 Cyrillic characters in STEP file Add support for converting pages from Windows encoding to Unicode --- src/Resource/FILES | 11 +- src/Resource/Resource_ANSI.pxx | 342 ++++++++++++++++++ .../{Resource_Big5.h => Resource_Big5.pxx} | 2 +- src/Resource/Resource_ConvertUnicode.c | 4 +- ...ratorOfDataMapOfAsciiStringAsciiString.hxx | 20 - ...orOfDataMapOfAsciiStringExtendedString.hxx | 20 - ...source_DataMapOfAsciiStringAsciiString.hxx | 2 - ...rce_DataMapOfAsciiStringExtendedString.hxx | 1 - src/Resource/Resource_FormatType.hxx | 11 + ...{Resource_GB2312.h => Resource_GB2312.pxx} | 2 +- .../{Resource_GBK.h => Resource_GBK.pxx} | 2 +- src/Resource/Resource_Manager.cxx | 1 - ...ource_Shiftjis.h => Resource_Shiftjis.pxx} | 2 +- src/Resource/Resource_Unicode.cxx | 62 +++- src/Resource/Resource_Unicode.hxx | 4 - .../STEPCAFControl_Controller.cxx | 9 + tests/bugs/step/bug31670 | 23 ++ tests/bugs/step/bug31670_1 | 122 +++++++ 18 files changed, 567 insertions(+), 73 deletions(-) create mode 100644 src/Resource/Resource_ANSI.pxx rename src/Resource/{Resource_Big5.h => Resource_Big5.pxx} (99%) delete mode 100644 src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringAsciiString.hxx delete mode 100644 src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringExtendedString.hxx rename src/Resource/{Resource_GB2312.h => Resource_GB2312.pxx} (99%) rename src/Resource/{Resource_GBK.h => Resource_GBK.pxx} (99%) rename src/Resource/{Resource_Shiftjis.h => Resource_Shiftjis.pxx} (99%) create mode 100644 tests/bugs/step/bug31670 create mode 100644 tests/bugs/step/bug31670_1 diff --git a/src/Resource/FILES b/src/Resource/FILES index 8fcbe3476c..aa2da19f8f 100755 --- a/src/Resource/FILES +++ b/src/Resource/FILES @@ -1,18 +1,17 @@ -Resource_Big5.h +Resource_ANSI.pxx +Resource_Big5.pxx Resource_ConvertUnicode.c Resource_ConvertUnicode.hxx -Resource_DataMapIteratorOfDataMapOfAsciiStringAsciiString.hxx -Resource_DataMapIteratorOfDataMapOfAsciiStringExtendedString.hxx Resource_DataMapOfAsciiStringAsciiString.hxx Resource_DataMapOfAsciiStringExtendedString.hxx Resource_FormatType.hxx -Resource_GB2312.h -Resource_GBK.h +Resource_GB2312.pxx +Resource_GBK.pxx Resource_LexicalCompare.cxx Resource_LexicalCompare.hxx Resource_Manager.cxx Resource_Manager.hxx Resource_NoSuchResource.hxx -Resource_Shiftjis.h +Resource_Shiftjis.pxx Resource_Unicode.cxx Resource_Unicode.hxx diff --git a/src/Resource/Resource_ANSI.pxx b/src/Resource/Resource_ANSI.pxx new file mode 100644 index 0000000000..d02353a889 --- /dev/null +++ b/src/Resource/Resource_ANSI.pxx @@ -0,0 +1,342 @@ +// Copyright (c) 2020 OPEN CASCADE SAS +// +// This file is part of Open CASCADE Technology software library. +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License version 2.1 as published +// by the Free Software Foundation, with special exception defined in the file +// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT +// distribution for complete text of the license and disclaimer of any warranty. +// +// Alternatively, this file may be used under the terms of Open CASCADE +// commercial license or contractual agreement. + +#include + +// Code pages ANSI -> UTF16 +static const Standard_ExtCharacter THE_CODEPAGES_ANSI[9][128] = +{ + { + // code page: cp1250 + 0x20ac, 0x81, 0x201a, 0x83, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x88, 0x2030, 0x160, 0x2039, + 0x15a, 0x164, 0x17d, 0x179, + 0x90, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x98, 0x2122, 0x161, 0x203a, + 0x15b, 0x165, 0x17e, 0x17a, + 0xa0, 0x2c7, 0x2d8, 0x141, + 0xa4, 0x104, 0xa6, 0xa7, + 0xa8, 0xa9, 0x15e, 0xab, + 0xac, 0xad, 0xae, 0x17b, + 0xb0, 0xb1, 0x2db, 0x142, + 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0x105, 0x15f, 0xbb, + 0x13d, 0x2dd, 0x13e, 0x17c, + 0x154, 0xc1, 0xc2, 0x102, + 0xc4, 0x139, 0x106, 0xc7, + 0x10c, 0xc9, 0x118, 0xcb, + 0x11a, 0xcd, 0xce, 0x10e, + 0x110, 0x143, 0x147, 0xd3, + 0xd4, 0x150, 0xd6, 0xd7, + 0x158, 0x16e, 0xda, 0x170, + 0xdc, 0xdd, 0x162, 0xdf, + 0x155, 0xe1, 0xe2, 0x103, + 0xe4, 0x13a, 0x107, 0xe7, + 0x10d, 0xe9, 0x119, 0xeb, + 0x11b, 0xed, 0xee, 0x10f, + 0x111, 0x144, 0x148, 0xf3, + 0xf4, 0x151, 0xf6, 0xf7, + 0x159, 0x16f, 0xfa, 0x171, + 0xfc, 0xfd, 0x163, 0x2d9 + }, + + { + // code page: cp1251 + 0x402, 0x403, 0x201a, 0x453, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x20ac, 0x2030, 0x409, 0x2039, + 0x40a, 0x40c, 0x40b, 0x40f, + 0x452, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x98, 0x2122, 0x459, 0x203a, + 0x45a, 0x45c, 0x45b, 0x45f, + 0xa0, 0x40e, 0x45e, 0x408, + 0xa4, 0x490, 0xa6, 0xa7, + 0x401, 0xa9, 0x404, 0xab, + 0xac, 0xad, 0xae, 0x407, + 0xb0, 0xb1, 0x406, 0x456, + 0x491, 0xb5, 0xb6, 0xb7, + 0x451, 0x2116, 0x454, 0xbb, + 0x458, 0x405, 0x455, 0x457, + 0x410, 0x411, 0x412, 0x413, + 0x414, 0x415, 0x416, 0x417, + 0x418, 0x419, 0x41a, 0x41b, + 0x41c, 0x41d, 0x41e, 0x41f, + 0x420, 0x421, 0x422, 0x423, + 0x424, 0x425, 0x426, 0x427, + 0x428, 0x429, 0x42a, 0x42b, + 0x42c, 0x42d, 0x42e, 0x42f, + 0x430, 0x431, 0x432, 0x433, + 0x434, 0x435, 0x436, 0x437, + 0x438, 0x439, 0x43a, 0x43b, + 0x43c, 0x43d, 0x43e, 0x43f, + 0x440, 0x441, 0x442, 0x443, + 0x444, 0x445, 0x446, 0x447, + 0x448, 0x449, 0x44a, 0x44b, + 0x44c, 0x44d, 0x44e, 0x44f + }, + + { + // code page: cp1252 + 0x20ac, 0x81, 0x201a, 0x192, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x160, 0x2039, + 0x152, 0x8d, 0x17d, 0x8f, + 0x90, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x161, 0x203a, + 0x153, 0x9d, 0x17e, 0x178, + 0xa0, 0xa1, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, + 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, + 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, + 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, + 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, + 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, + 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff + }, + + { + // code page: cp1253 + 0x20ac, 0x81, 0x201a, 0x192, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x88, 0x2030, 0x8a, 0x2039, + 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x98, 0x2122, 0x9a, 0x203a, + 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0x385, 0x386, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0x0, 0xab, + 0xac, 0xad, 0xae, 0x2015, + 0xb0, 0xb1, 0xb2, 0xb3, + 0x384, 0xb5, 0xb6, 0xb7, + 0x388, 0x389, 0x38a, 0xbb, + 0x38c, 0xbd, 0x38e, 0x38f, + 0x390, 0x391, 0x392, 0x393, + 0x394, 0x395, 0x396, 0x397, + 0x398, 0x399, 0x39a, 0x39b, + 0x39c, 0x39d, 0x39e, 0x39f, + 0x3a0, 0x3a1, 0x0, 0x3a3, + 0x3a4, 0x3a5, 0x3a6, 0x3a7, + 0x3a8, 0x3a9, 0x3aa, 0x3ab, + 0x3ac, 0x3ad, 0x3ae, 0x3af, + 0x3b0, 0x3b1, 0x3b2, 0x3b3, + 0x3b4, 0x3b5, 0x3b6, 0x3b7, + 0x3b8, 0x3b9, 0x3ba, 0x3bb, + 0x3bc, 0x3bd, 0x3be, 0x3bf, + 0x3c0, 0x3c1, 0x3c2, 0x3c3, + 0x3c4, 0x3c5, 0x3c6, 0x3c7, + 0x3c8, 0x3c9, 0x3ca, 0x3cb, + 0x3cc, 0x3cd, 0x3ce, 0x0 + }, + + { + // code page: cp1254 + 0x20ac, 0x81, 0x201a, 0x192, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x160, 0x2039, + 0x152, 0x8d, 0x8e, 0x8f, + 0x90, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x161, 0x203a, + 0x153, 0x9d, 0x9e, 0x178, + 0xa0, 0xa1, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, + 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, + 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, + 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, + 0x11e, 0xd1, 0xd2, 0xd3, + 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0x130, 0x15e, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, + 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, + 0xec, 0xed, 0xee, 0xef, + 0x11f, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0x131, 0x15f, 0xff + }, + + { + // code page: cp1255 + 0x20ac, 0x81, 0x201a, 0x192, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x8a, 0x2039, + 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x9a, 0x203a, + 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, + 0x20aa, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xd7, 0xab, + 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, + 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xf7, 0xbb, + 0xbc, 0xbd, 0xbe, 0xbf, + 0x5b0, 0x5b1, 0x5b2, 0x5b3, + 0x5b4, 0x5b5, 0x5b6, 0x5b7, + 0x5b8, 0x5b9, 0x5ba, 0x5bb, + 0x5bc, 0x5bd, 0x5be, 0x5bf, + 0x5c0, 0x5c1, 0x5c2, 0x5c3, + 0x5f0, 0x5f1, 0x5f2, 0x5f3, + 0x5f4, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x5d0, 0x5d1, 0x5d2, 0x5d3, + 0x5d4, 0x5d5, 0x5d6, 0x5d7, + 0x5d8, 0x5d9, 0x5da, 0x5db, + 0x5dc, 0x5dd, 0x5de, 0x5df, + 0x5e0, 0x5e1, 0x5e2, 0x5e3, + 0x5e4, 0x5e5, 0x5e6, 0x5e7, + 0x5e8, 0x5e9, 0x5ea, 0x0, + 0x0, 0x200e, 0x200f, 0x0 + }, + + { + // code page: cp1256 + 0x20ac, 0x67e, 0x201a, 0x192, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x679, 0x2039, + 0x152, 0x686, 0x698, 0x688, + 0x6af, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x6a9, 0x2122, 0x691, 0x203a, + 0x153, 0x200c, 0x200d, 0x6ba, + 0xa0, 0x60c, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0x6be, 0xab, + 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, + 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0x61b, 0xbb, + 0xbc, 0xbd, 0xbe, 0x61f, + 0x6c1, 0x621, 0x622, 0x623, + 0x624, 0x625, 0x626, 0x627, + 0x628, 0x629, 0x62a, 0x62b, + 0x62c, 0x62d, 0x62e, 0x62f, + 0x630, 0x631, 0x632, 0x633, + 0x634, 0x635, 0x636, 0xd7, + 0x637, 0x638, 0x639, 0x63a, + 0x640, 0x641, 0x642, 0x643, + 0xe0, 0x644, 0xe2, 0x645, + 0x646, 0x647, 0x648, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, + 0x649, 0x64a, 0xee, 0xef, + 0x64b, 0x64c, 0x64d, 0x64e, + 0xf4, 0x64f, 0x650, 0xf7, + 0x651, 0xf9, 0x652, 0xfb, + 0xfc, 0x200e, 0x200f, 0x6d2 + }, + + { + // code page: cp1257 + 0x20ac, 0x81, 0x201a, 0x83, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x88, 0x2030, 0x8a, 0x2039, + 0x8c, 0xa8, 0x2c7, 0xb8, + 0x90, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x98, 0x2122, 0x9a, 0x203a, + 0x9c, 0xaf, 0x2db, 0x9f, + 0xa0, 0x0, 0xa2, 0xa3, + 0xa4, 0x0, 0xa6, 0xa7, + 0xd8, 0xa9, 0x156, 0xab, + 0xac, 0xad, 0xae, 0xc6, + 0xb0, 0xb1, 0xb2, 0xb3, + 0xb4, 0xb5, 0xb6, 0xb7, + 0xf8, 0xb9, 0x157, 0xbb, + 0xbc, 0xbd, 0xbe, 0xe6, + 0x104, 0x12e, 0x100, 0x106, + 0xc4, 0xc5, 0x118, 0x112, + 0x10c, 0xc9, 0x179, 0x116, + 0x122, 0x136, 0x12a, 0x13b, + 0x160, 0x143, 0x145, 0xd3, + 0x14c, 0xd5, 0xd6, 0xd7, + 0x172, 0x141, 0x15a, 0x16a, + 0xdc, 0x17b, 0x17d, 0xdf, + 0x105, 0x12f, 0x101, 0x107, + 0xe4, 0xe5, 0x119, 0x113, + 0x10d, 0xe9, 0x17a, 0x117, + 0x123, 0x137, 0x12b, 0x13c, + 0x161, 0x144, 0x146, 0xf3, + 0x14d, 0xf5, 0xf6, 0xf7, + 0x173, 0x142, 0x15b, 0x16b, + 0xfc, 0x17c, 0x17e, 0x2d9 + }, + + { + // code page: cp1258 + 0x20ac, 0x81, 0x201a, 0x192, + 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x8a, 0x2039, + 0x152, 0x8d, 0x8e, 0x8f, + 0x90, 0x2018, 0x2019, 0x201c, + 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x9a, 0x203a, + 0x153, 0x9d, 0x9e, 0x178, + 0xa0, 0xa1, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, + 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, + 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, + 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0x102, + 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, + 0x300, 0xcd, 0xce, 0xcf, + 0x110, 0xd1, 0x309, 0xd3, + 0xd4, 0x1a0, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0x1af, 0x303, 0xdf, + 0xe0, 0xe1, 0xe2, 0x103, + 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, + 0x301, 0xed, 0xee, 0xef, + 0x111, 0xf1, 0x323, 0xf3, + 0xf4, 0x1a1, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0x1b0, 0x20ab, 0xff + } +}; \ No newline at end of file diff --git a/src/Resource/Resource_Big5.h b/src/Resource/Resource_Big5.pxx similarity index 99% rename from src/Resource/Resource_Big5.h rename to src/Resource/Resource_Big5.pxx index b3b5146fba..dd2a9ac1a2 100644 --- a/src/Resource/Resource_Big5.h +++ b/src/Resource/Resource_Big5.pxx @@ -13,7 +13,7 @@ // Alternatively, this file may be used under the terms of Open CASCADE // commercial license or contractual agreement. -static unsigned int big5uni [19782] = { +static const unsigned int big5uni [19782] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, diff --git a/src/Resource/Resource_ConvertUnicode.c b/src/Resource/Resource_ConvertUnicode.c index 02851394fa..a9e0e38d6d 100644 --- a/src/Resource/Resource_ConvertUnicode.c +++ b/src/Resource/Resource_ConvertUnicode.c @@ -20,8 +20,8 @@ typedef unsigned short char16 ; -#include -#include +#include "Resource_Shiftjis.pxx" +#include "Resource_GB2312.pxx" #define isjis(c) (((c)>=0x21 && (c)<=0x7e)) #define iseuc(c) (((c)>=0xa1 && (c)<=0xfe)) diff --git a/src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringAsciiString.hxx b/src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringAsciiString.hxx deleted file mode 100644 index fe6c81c71e..0000000000 --- a/src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringAsciiString.hxx +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2015 OPEN CASCADE SAS -// -// This file is part of Open CASCADE Technology software library. -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License version 2.1 as published -// by the Free Software Foundation, with special exception defined in the file -// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT -// distribution for complete text of the license and disclaimer of any warranty. -// -// Alternatively, this file may be used under the terms of Open CASCADE -// commercial license or contractual agreement. - - -#ifndef Resource_DataMapIteratorOfDataMapOfAsciiStringAsciiString_HeaderFile -#define Resource_DataMapIteratorOfDataMapOfAsciiStringAsciiString_HeaderFile - -#include - -#endif diff --git a/src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringExtendedString.hxx b/src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringExtendedString.hxx deleted file mode 100644 index 2942e2008f..0000000000 --- a/src/Resource/Resource_DataMapIteratorOfDataMapOfAsciiStringExtendedString.hxx +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2015 OPEN CASCADE SAS -// -// This file is part of Open CASCADE Technology software library. -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License version 2.1 as published -// by the Free Software Foundation, with special exception defined in the file -// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT -// distribution for complete text of the license and disclaimer of any warranty. -// -// Alternatively, this file may be used under the terms of Open CASCADE -// commercial license or contractual agreement. - - -#ifndef Resource_DataMapIteratorOfDataMapOfAsciiStringExtendedString_HeaderFile -#define Resource_DataMapIteratorOfDataMapOfAsciiStringExtendedString_HeaderFile - -#include - -#endif diff --git a/src/Resource/Resource_DataMapOfAsciiStringAsciiString.hxx b/src/Resource/Resource_DataMapOfAsciiStringAsciiString.hxx index 7ad22fb721..9cd00eaebf 100644 --- a/src/Resource/Resource_DataMapOfAsciiStringAsciiString.hxx +++ b/src/Resource/Resource_DataMapOfAsciiStringAsciiString.hxx @@ -17,8 +17,6 @@ #ifndef Resource_DataMapOfAsciiStringAsciiString_HeaderFile #define Resource_DataMapOfAsciiStringAsciiString_HeaderFile -#include -#include #include #include diff --git a/src/Resource/Resource_DataMapOfAsciiStringExtendedString.hxx b/src/Resource/Resource_DataMapOfAsciiStringExtendedString.hxx index 0fe19d230f..7b1e3b672d 100644 --- a/src/Resource/Resource_DataMapOfAsciiStringExtendedString.hxx +++ b/src/Resource/Resource_DataMapOfAsciiStringExtendedString.hxx @@ -19,7 +19,6 @@ #include #include -#include #include typedef NCollection_DataMap Resource_DataMapOfAsciiStringExtendedString; diff --git a/src/Resource/Resource_FormatType.hxx b/src/Resource/Resource_FormatType.hxx index a2da6b05f0..9537f2b93d 100644 --- a/src/Resource/Resource_FormatType.hxx +++ b/src/Resource/Resource_FormatType.hxx @@ -29,6 +29,17 @@ enum Resource_FormatType Resource_FormatType_UTF8, //!< multi-byte UTF-8 encoding Resource_FormatType_SystemLocale, //!< active system-defined locale; this value is strongly NOT recommended to use + // non ASCII format types + Resource_FormatType_CP1250, //!< cp1250 (Central European) encoding + Resource_FormatType_CP1251, //!< cp1251 (Cyrillic) encoding + Resource_FormatType_CP1252, //!< cp1252 (Western European) encoding + Resource_FormatType_CP1253, //!< cp1253 (Greek) encoding + Resource_FormatType_CP1254, //!< cp1254 (Turkish) encoding + Resource_FormatType_CP1255, //!< cp1255 (Hebrew) encoding + Resource_FormatType_CP1256, //!< cp1256 (Arabic) encoding + Resource_FormatType_CP1257, //!< cp1257 (Baltic) encoding + Resource_FormatType_CP1258, //!< cp1258 (Vietnamese) encoding + // old aliases Resource_SJIS = Resource_FormatType_SJIS, Resource_EUC = Resource_FormatType_EUC, diff --git a/src/Resource/Resource_GB2312.h b/src/Resource/Resource_GB2312.pxx similarity index 99% rename from src/Resource/Resource_GB2312.h rename to src/Resource/Resource_GB2312.pxx index 1faa0f69aa..fa1779f715 100644 --- a/src/Resource/Resource_GB2312.h +++ b/src/Resource/Resource_GB2312.pxx @@ -14,7 +14,7 @@ commercial license or contractual agreement. */ -static char16 unigb [65536] = { +static const char16 unigb [65536] = { 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , diff --git a/src/Resource/Resource_GBK.h b/src/Resource/Resource_GBK.pxx similarity index 99% rename from src/Resource/Resource_GBK.h rename to src/Resource/Resource_GBK.pxx index d7cee6829c..78734560d7 100644 --- a/src/Resource/Resource_GBK.h +++ b/src/Resource/Resource_GBK.pxx @@ -13,7 +13,7 @@ // Alternatively, this file may be used under the terms of Open CASCADE // commercial license or contractual agreement. -static unsigned short gbkuni [23940] = { +static const unsigned short gbkuni [23940] = { 0x4E02, 0x4E04, 0x4E05, 0x4E06, 0x4E0F, 0x4E12, 0x4E17, 0x4E1F, 0x4E20, 0x4E21, 0x4E23, 0x4E26, diff --git a/src/Resource/Resource_Manager.cxx b/src/Resource/Resource_Manager.cxx index 731d8929d9..6d94e9aa07 100644 --- a/src/Resource/Resource_Manager.cxx +++ b/src/Resource/Resource_Manager.cxx @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Resource/Resource_Shiftjis.h b/src/Resource/Resource_Shiftjis.pxx similarity index 99% rename from src/Resource/Resource_Shiftjis.h rename to src/Resource/Resource_Shiftjis.pxx index 8923327353..deef59fad7 100644 --- a/src/Resource/Resource_Shiftjis.h +++ b/src/Resource/Resource_Shiftjis.pxx @@ -14,7 +14,7 @@ commercial license or contractual agreement. */ -static char16 unisjis [65536] = { +static const char16 unisjis [65536] = { 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , diff --git a/src/Resource/Resource_Unicode.cxx b/src/Resource/Resource_Unicode.cxx index 6d8a735d6e..c3e1c7fb96 100644 --- a/src/Resource/Resource_Unicode.cxx +++ b/src/Resource/Resource_Unicode.cxx @@ -15,14 +15,16 @@ // commercial license or contractual agreement. -#include -#include #include -#include #include #include #include #include +#include +#include +#include "Resource_ANSI.pxx" +#include "Resource_GBK.pxx" +#include "Resource_Big5.pxx" #define isjis(c) (((c)>=0x21 && (c)<=0x7e)) #define iseuc(c) (((c)>=0xa1 && (c)<=0xfe)) @@ -355,14 +357,6 @@ Standard_Boolean Resource_Unicode::ConvertBig5ToUnicode(const Standard_CString f return Standard_True; } -void Resource_Unicode::ConvertANSIToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr) -{ - tostr.Clear(); - - TCollection_ExtendedString curext(fromstr); - tostr.AssignCat(curext); -} - Standard_Boolean Resource_Unicode::ConvertUnicodeToSJIS(const TCollection_ExtendedString& fromstr, Standard_PCharacter& tostr, const Standard_Integer maxsize) @@ -618,9 +612,39 @@ void Resource_Unicode::ConvertFormatToUnicode (const Resource_FormatType theForm break; } case Resource_FormatType_ANSI: + { + theToStr = TCollection_ExtendedString(theFromStr, Standard_False); + break; + } + case Resource_FormatType_CP1250: + case Resource_FormatType_CP1251: + case Resource_FormatType_CP1252: + case Resource_FormatType_CP1253: + case Resource_FormatType_CP1254: + case Resource_FormatType_CP1255: + case Resource_FormatType_CP1256: + case Resource_FormatType_CP1257: + case Resource_FormatType_CP1258: + { + const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250; + const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex]; + theToStr.Clear(); + for (const char* anInputPntr = theFromStr; *anInputPntr != '\0'; ++anInputPntr) + { + Standard_ExtCharacter aRes = (*anInputPntr & 0x80) != 0 + ? aCodePage[(0x7f & *anInputPntr)] + : *anInputPntr; + if (aRes == (Standard_ExtCharacter)0x0) + { + aRes = '?'; + } + theToStr.Insert(theToStr.Length() + 1, aRes); + } + break; + } case Resource_FormatType_UTF8: { - theToStr = TCollection_ExtendedString (theFromStr, theFormat == Resource_FormatType_UTF8); + theToStr = TCollection_ExtendedString (theFromStr, Standard_True); break; } case Resource_FormatType_SystemLocale: @@ -654,7 +678,19 @@ Standard_Boolean Resource_Unicode::ConvertUnicodeToFormat(const Resource_FormatT } case Resource_FormatType_ANSI: { - return ConvertUnicodeToANSI (theFromStr, theToStr, theMaxSize); + return ConvertUnicodeToANSI(theFromStr, theToStr, theMaxSize); + } + case Resource_FormatType_CP1250: + case Resource_FormatType_CP1251: + case Resource_FormatType_CP1252: + case Resource_FormatType_CP1253: + case Resource_FormatType_CP1254: + case Resource_FormatType_CP1255: + case Resource_FormatType_CP1256: + case Resource_FormatType_CP1257: + case Resource_FormatType_CP1258: + { + throw Standard_NotImplemented("Resource_Unicode::ConvertUnicodeToFormat - conversion from CP1250 - CP1258 to Unicode is not implemented"); } case Resource_FormatType_UTF8: { diff --git a/src/Resource/Resource_Unicode.hxx b/src/Resource/Resource_Unicode.hxx index aaec2f2a7f..f9edcd2602 100644 --- a/src/Resource/Resource_Unicode.hxx +++ b/src/Resource/Resource_Unicode.hxx @@ -59,10 +59,6 @@ public: //! to Unicode ExtendedString . Standard_EXPORT static Standard_Boolean ConvertBig5ToUnicode (const Standard_CString fromstr, TCollection_ExtendedString& tostr); - //! Converts non-ASCII CString in ANSI format - //! to Unicode ExtendedString . - Standard_EXPORT static void ConvertANSIToUnicode (const Standard_CString fromstr, TCollection_ExtendedString& tostr); - //! Converts Unicode ExtendedString to non-ASCII //! CString in SJIS format, limited to //! characters. To translate the whole , use more diff --git a/src/STEPCAFControl/STEPCAFControl_Controller.cxx b/src/STEPCAFControl/STEPCAFControl_Controller.cxx index 5e43b80697..c9c1398220 100644 --- a/src/STEPCAFControl/STEPCAFControl_Controller.cxx +++ b/src/STEPCAFControl/STEPCAFControl_Controller.cxx @@ -79,6 +79,15 @@ Standard_Boolean STEPCAFControl_Controller::Init () Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval GB"); // Resource_FormatType_GB Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval UTF8"); // Resource_FormatType_UTF8 Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval SystemLocale"); // Resource_FormatType_SystemLocale + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1250"); // Resource_FormatType_CP1250 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1251"); // Resource_FormatType_CP1251 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1252"); // Resource_FormatType_CP1252 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1253"); // Resource_FormatType_CP1253 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1254"); // Resource_FormatType_CP1254 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1255"); // Resource_FormatType_CP1255 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1256"); // Resource_FormatType_CP1256 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1257"); // Resource_FormatType_CP1257 + Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1258"); // Resource_FormatType_CP1258 Interface_Static::SetCVal ("read.stepcaf.codepage", "UTF8"); return Standard_True; diff --git a/tests/bugs/step/bug31670 b/tests/bugs/step/bug31670 new file mode 100644 index 0000000000..dadeb88fe5 --- /dev/null +++ b/tests/bugs/step/bug31670 @@ -0,0 +1,23 @@ +puts "================" +puts "0031670: Data Exchange - cp1251 Cyrillic characters in STEP file" +puts "================" +puts "" + +pload OCAF + +# Read file +param read.stepcaf.codepage CP1251 +ReadStep D [locate_data_file bug31670_russian.stp] + +# Checking +set aNameAssambly [encoding convertfrom utf-8 "\xd0\xa1\xd0\xb1\xd0\xbe\xd1\x80\xd0\xba\xd0\xb0\x31"] +set aNameShape [encoding convertfrom utf-8 "\xd0\x94\xd0\xb5\xd1\x82"] + +set isOK 1 +if { [GetName D 0:1:1:1] != "$aNameAssambly" } { set isOK 0 } +for { set i 1 } { $i < 9 } { incr i } { + if { [GetName D 0:1:1:[expr $i + 1]] != "$aNameShape$i" } { set isOK 0 } +} +if { $isOK == 0 } { puts "Error: unable to read CP1251 STEP" } + +Close D \ No newline at end of file diff --git a/tests/bugs/step/bug31670_1 b/tests/bugs/step/bug31670_1 new file mode 100644 index 0000000000..91c1ed262a --- /dev/null +++ b/tests/bugs/step/bug31670_1 @@ -0,0 +1,122 @@ +puts "================" +puts "0031670: Data Exchange - cp1251 Cyrillic characters in STEP file" +puts "Target encodings: cp1250, cp1251, cp1252, cp1253, cp1254, cp1255, cp1256,, cp1257, cp1258" +puts "Test case:" +puts "1) Creates a temporary STEP file-template using WriteStep." +puts "2) Reads generated template and replaces @tmp_name@ entity in it with target language characters using Tcl." +puts "3) Generates 2 STEP files in UTF-8 and CP125(N) encodings (converted by Tcl)." +puts "4) Reads generated files using StepRead and validates entity name." +puts "================" +puts "" + +proc fileToString { thePath } { + set aFile [open "$thePath" r] + set aText [read $aFile [file size "$thePath"]] + close $aFile + return $aText +} + +proc fileFromString { thePath theContent theCodePage } { + set aFile [open "$thePath" w] + fconfigure $aFile -translation lf -encoding "$theCodePage" + puts $aFile $theContent + close $aFile +} + +proc fileCreateAndCompare { thePathFrom theUtfPathTo theCpPathTo theNameFrom theNameTo theCodePage } { + set aCodePage [string tolower "$theCodePage"] + regsub -all -- $theNameFrom [fileToString "$thePathFrom"] "$theNameTo" aContent + fileFromString "$theUtfPathTo" "$aContent" "utf-8" + fileFromString "$theCpPathTo" "$aContent" "$aCodePage" + + param read.stepcaf.codepage UTF8 + ReadStep U "$theUtfPathTo" + ReadStep A "$theCpPathTo" + param read.stepcaf.codepage "$theCodePage" + ReadStep CP "$theCpPathTo" + + if { [GetName U 0:1:1:1] != "$theNameTo" } { puts "Error: unable to read UTF-8 STEP" } + if { [GetName CP 0:1:1:1] != "$theNameTo" } { puts "Error: unable to read $theCodePage STEP" } + if { [GetName A 0:1:1:1] == "$theNameTo" } { puts "Error: broken test case" } + catch { Close A } + catch { Close U } + catch { Close CP } +} + +pload XDE OCAF MODELING VISUALIZATION +set aTmpNameTmpl "@tmp_name@" +set aTmpFileTmpl "${imagedir}/${casename}-tmp.stp" +set aTmpFileUtf8 "${imagedir}/${casename}-tmp-utf8.stp" +set aTmpFileCP125N "${imagedir}/${casename}-tmp-CP125N.stp" + +# "Test" (english multi-encoding) + "Test" (encoding in the target language) + +# multi-encoding +set anEngName [encoding convertfrom utf-8 "\x54\x65\x73\x74"] +# cp1250 +set aLat1Name [encoding convertfrom utf-8 "\x50\x72\x6f\x62\xed\x68\xe1"] +# cp1251 +set aCyrName [encoding convertfrom utf-8 "\xD0\xa2\xD0\xB5\xD1\x81\xD1\x82"] +# cp1252 +set aLat2Name [encoding convertfrom utf-8 "\x50\x72\x6f\x62\xed\x68\xe1"] +# cp1253 +set aGreekName [encoding convertfrom utf-8 "\xce\x94\xce\xbf\xce\xba\xce\xb9\xce\xbc\xce\xae"] +# cp1254 +set aTurkName [encoding convertfrom utf-8 "\xd6\x6c\xe7\x65\x6b"] +# cp1255 +set aHebrName [encoding convertfrom utf-8 "\xd7\x9e\xd6\xb4\xd7\x91\xd6\xb0\xd7\x97\xd6\xb8\xd7\x9f"] +# cp1256 +set anArabName [encoding convertfrom utf-8 "\xd8\xa7\xd8\xae\xd8\xaa\xd8\xa8\xd8\xa7\xd8\xb1"] +# cp1257 +set aBaltName [encoding convertfrom utf-8 "\x50\xc4\x81\x72\x62\x61\x75\x64\x65"] +# cp1258 +set aViettName [encoding convertfrom utf-8 "\u0054\u0068\u00ed \u006e\u0067\u0068\u0069\u1ec7\u006d"] + +box b 1 2 3 +catch { Close A } +catch { Close T } +catch { Close U } +catch { Close CP } + +XNewDoc T +XAddShape T b 0 +XSetColor T b 1 0 0 +SetName T 0:1:1:1 "$aTmpNameTmpl" +GetName T 0:1:1:1 +WriteStep T "$aTmpFileTmpl" + +puts "Central European" +set aName "$anEngName $aLat1Name" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1250" + +puts "Cyrillic" +set aName "$anEngName $aCyrName" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1251" + +puts "Western European" +set aName "$anEngName $aLat2Name" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1252" + +puts "Greek" +set aName "$anEngName $aGreekName" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1253" + +puts "Turkish" +set aName "$anEngName $aTurkName" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1254" + +puts "Hebrew" +set aName "$anEngName $aHebrName" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1255" + +puts "Arabic" +set aName "$anEngName $anArabName" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1256" + +puts "Baltic" +set aName "$anEngName $aBaltName" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1257" + +puts "Vietnamese" +set aName "$anEngName $aViettName" +fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileCP125N" "$aTmpNameTmpl" "$aName" "CP1258" \ No newline at end of file