From c026141bb6233378e0c98ae8da953ff6bd949b87 Mon Sep 17 00:00:00 2001 From: dpasukhi Date: Mon, 23 Nov 2020 15:04:06 +0300 Subject: [PATCH] 0031923: DXF import - add support of code page DOS850 - Add support for converting from CP850 to UTF-8 and UTF-8 to CP850; - Add support for reading STEP file encoding by cp850 code page. --- src/Resource/Resource_CodePages.pxx | 38 +++++++++++++- src/Resource/Resource_FormatType.hxx | 1 + src/Resource/Resource_Unicode.cxx | 2 + src/STEPControl/STEPControl_Controller.cxx | 3 +- tests/bugs/step/bug31923 | 60 ++++++++++++++++++++++ 5 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 tests/bugs/step/bug31923 diff --git a/src/Resource/Resource_CodePages.pxx b/src/Resource/Resource_CodePages.pxx index 9b5a92fda9..2866b454c3 100644 --- a/src/Resource/Resource_CodePages.pxx +++ b/src/Resource/Resource_CodePages.pxx @@ -14,7 +14,7 @@ #include // Code pages ANSI -> UTF16 -static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_iso8859_9 - Resource_FormatType_CP1250 + 1][128] = +static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_CP850 - Resource_FormatType_CP1250 + 1][128] = { { // code page: cp1250 @@ -662,5 +662,41 @@ static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_iso885 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff + }, + + { + // code page: cp850 + 0xc7, 0xfc, 0xe9, 0xe2, + 0xe4, 0xe0, 0xe5, 0xe7, + 0xea, 0xeb, 0xe8, 0xef, + 0xee, 0xec, 0xc4, 0xc5, + 0xc9, 0xe6, 0xc6, 0xf4, + 0xf6, 0xf2, 0xfb, 0xf9, + 0xff, 0xd6, 0xdc, 0xf8, + 0xa3, 0xd8, 0xd7, 0x192, + 0xe1, 0xed, 0xf3, 0xfa, + 0xf1, 0xd1, 0xaa, 0xba, + 0xbf, 0xae, 0xac, 0xbd, + 0xbc, 0xa1, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, + 0x2524, 0xc1, 0xc2, 0xc0, + 0xa9, 0x2563, 0x2551, 0x2557, + 0x255d, 0xa2, 0xa5, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, + 0x2500, 0x253c, 0xe3, 0xc3, + 0x255a, 0x2554, 0x2569, 0x2566, + 0x2560, 0x2550, 0x256c, 0xa4, + 0xf0, 0xd0, 0xca, 0xcb, + 0xc8, 0x131, 0xcd, 0xce, + 0xcf, 0x2518, 0x250c, 0x2588, + 0x2584, 0xa6, 0xcc, 0x2580, + 0xd3, 0xdf, 0xd4, 0xd2, + 0xf5, 0xd5, 0xb5, 0xfe, + 0xde, 0xda, 0xdb, 0xd9, + 0xfd, 0xdd, 0xaf, 0xb4, + 0xad, 0xb1, 0x2017, 0xbe, + 0xb6, 0xa7, 0xf7, 0xb8, + 0xb0, 0xa8, 0xb7, 0xb9, + 0xb3, 0xb2, 0x25a0, 0xa0 } }; \ No newline at end of file diff --git a/src/Resource/Resource_FormatType.hxx b/src/Resource/Resource_FormatType.hxx index 62e0c06e04..59af43d6e1 100644 --- a/src/Resource/Resource_FormatType.hxx +++ b/src/Resource/Resource_FormatType.hxx @@ -52,6 +52,7 @@ enum Resource_FormatType Resource_FormatType_iso8859_9, //!< ISO 8859-9 (Turkish) encoding // Addition code pages + Resource_FormatType_CP850, //!< ISO 850 (Western European) encoding Resource_FormatType_GBK, //!< GBK (UnifiedChinese) encoding Resource_FormatType_Big5, //!< Big5 (TradChinese) encoding diff --git a/src/Resource/Resource_Unicode.cxx b/src/Resource/Resource_Unicode.cxx index 03761b8c10..ca3570a4d6 100644 --- a/src/Resource/Resource_Unicode.cxx +++ b/src/Resource/Resource_Unicode.cxx @@ -634,6 +634,7 @@ void Resource_Unicode::ConvertFormatToUnicode (const Resource_FormatType theForm case Resource_FormatType_iso8859_7: case Resource_FormatType_iso8859_8: case Resource_FormatType_iso8859_9: + case Resource_FormatType_CP850: { const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250; const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex]; @@ -718,6 +719,7 @@ Standard_Boolean Resource_Unicode::ConvertUnicodeToFormat(const Resource_FormatT case Resource_FormatType_iso8859_7: case Resource_FormatType_iso8859_8: case Resource_FormatType_iso8859_9: + case Resource_FormatType_CP850: { if (theMaxSize < theFromStr.Length()) { diff --git a/src/STEPControl/STEPControl_Controller.cxx b/src/STEPControl/STEPControl_Controller.cxx index c8df0600a1..2bdd45e2b8 100644 --- a/src/STEPControl/STEPControl_Controller.cxx +++ b/src/STEPControl/STEPControl_Controller.cxx @@ -245,9 +245,10 @@ STEPControl_Controller::STEPControl_Controller () Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-7"); // Resource_FormatType_iso8859_7 Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-8"); // Resource_FormatType_iso8859_8 Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-9"); // Resource_FormatType_iso8859_9 + Interface_Static::Init("step", "read.step.codepage", '&', "eval CP850"); // Resource_FormatType_CP850 Interface_Static::SetCVal("read.step.codepage", "UTF8"); - Standard_STATIC_ASSERT((int)Resource_FormatType_iso8859_9 - (int)Resource_FormatType_CP1250 == 17); // "Error: Invalid Codepage Enumeration" + Standard_STATIC_ASSERT((int)Resource_FormatType_CP850 - (int)Resource_FormatType_CP1250 == 18); // "Error: Invalid Codepage Enumeration" init = Standard_True; } diff --git a/tests/bugs/step/bug31923 b/tests/bugs/step/bug31923 new file mode 100644 index 0000000000..4ddd385b72 --- /dev/null +++ b/tests/bugs/step/bug31923 @@ -0,0 +1,60 @@ +puts "================" +puts "0031923: DXF import - add support of code page DOS850" +puts "" +puts "Test case:" +puts "1) Creates a temporary STEP file-template using WriteStep." +puts "2) Reads generated template and replaces @tmp_name@ entity in it with Latin characters using Tcl." +puts "3) Generates 2 STEP files in UTF-8 and ISO850 encodings (converted by Tcl)." +puts "4) Reads generated files using StepRead and validates entity name." +puts "================" +puts "" + +proc fileToString { thePath } { + set aFile [open "$thePath" r] + set aText [read $aFile [file size "$thePath"]] + close $aFile + return $aText +} + +proc fileFromString { thePath theContent theCodePage } { + set aFile [open "$thePath" w] + fconfigure $aFile -translation lf -encoding "$theCodePage" + puts $aFile $theContent + close $aFile +} + +pload XDE OCAF MODELING VISUALIZATION +set aTmpNameTmpl "@tmp_name@" +set aTmpFileTmpl "${imagedir}/${casename}-tmp.stp" +set aTmpFileUtf8 "${imagedir}/${casename}-tmp-utf8.stp" +set aTmpFileCP850 "${imagedir}/${casename}-tmp-cp850.stp" +# Überprüfung de codificação +set aName [encoding convertfrom utf-8 "\xc3\x9c\x62\x65\x72\x70\x72\xc3\xbc\x66\x75\x6e\x67 \x64\x65 \x63\x6f\x64\x69\x66\x69\x63\x61\xc3\xa7\xc3\xa3\x6f"] + +catch { Close A } +catch { Close T } +catch { Close U } +catch { Close C } + +box b 1 2 3 +XNewDoc T +XAddShape T b 0 +XSetColor T b 1 0 0 +SetName T 0:1:1:1 "$aTmpNameTmpl" +GetName T 0:1:1:1 +WriteStep T "$aTmpFileTmpl" + +regsub -all -- $aTmpNameTmpl [fileToString "$aTmpFileTmpl"] "$aName" aContent +fileFromString "$aTmpFileUtf8" "$aContent" "utf-8" +fileFromString "$aTmpFileCP850" "$aContent" "cp850" + +param read.step.codepage UTF8 +ReadStep U "$aTmpFileUtf8" +ReadStep A "$aTmpFileCP850" +param read.step.codepage CP850 +ReadStep C "$aTmpFileCP850" + +puts "Original name is 'Überprüfung de codificação'" +if { [GetName U 0:1:1:1] != "$aName" } { puts "Error: unable to read UTF-8 STEP" } +if { [GetName C 0:1:1:1] != "$aName" } { puts "Error: unable to read CP850 STEP" } +if { [GetName A 0:1:1:1] == "$aName" } { puts "Error: broken test case" }