1
0
mirror of https://git.dev.opencascade.org/repos/occt.git synced 2025-04-04 18:06:22 +03:00

0031923: DXF import - add support of code page DOS850

- Add support for converting from CP850 to UTF-8 and UTF-8 to CP850;
- Add support for reading STEP file encoding by cp850 code page.
This commit is contained in:
dpasukhi 2020-11-23 15:04:06 +03:00 committed by bugmaster
parent 1e08a76f1e
commit c026141bb6
5 changed files with 102 additions and 2 deletions

View File

@ -14,7 +14,7 @@
#include <Standard_TypeDef.hxx>
// Code pages ANSI -> UTF16
static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_iso8859_9 - Resource_FormatType_CP1250 + 1][128] =
static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_CP850 - Resource_FormatType_CP1250 + 1][128] =
{
{
// code page: cp1250
@ -662,5 +662,41 @@ static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_iso885
0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0x131, 0x15f, 0xff
},
{
// code page: cp850
0xc7, 0xfc, 0xe9, 0xe2,
0xe4, 0xe0, 0xe5, 0xe7,
0xea, 0xeb, 0xe8, 0xef,
0xee, 0xec, 0xc4, 0xc5,
0xc9, 0xe6, 0xc6, 0xf4,
0xf6, 0xf2, 0xfb, 0xf9,
0xff, 0xd6, 0xdc, 0xf8,
0xa3, 0xd8, 0xd7, 0x192,
0xe1, 0xed, 0xf3, 0xfa,
0xf1, 0xd1, 0xaa, 0xba,
0xbf, 0xae, 0xac, 0xbd,
0xbc, 0xa1, 0xab, 0xbb,
0x2591, 0x2592, 0x2593, 0x2502,
0x2524, 0xc1, 0xc2, 0xc0,
0xa9, 0x2563, 0x2551, 0x2557,
0x255d, 0xa2, 0xa5, 0x2510,
0x2514, 0x2534, 0x252c, 0x251c,
0x2500, 0x253c, 0xe3, 0xc3,
0x255a, 0x2554, 0x2569, 0x2566,
0x2560, 0x2550, 0x256c, 0xa4,
0xf0, 0xd0, 0xca, 0xcb,
0xc8, 0x131, 0xcd, 0xce,
0xcf, 0x2518, 0x250c, 0x2588,
0x2584, 0xa6, 0xcc, 0x2580,
0xd3, 0xdf, 0xd4, 0xd2,
0xf5, 0xd5, 0xb5, 0xfe,
0xde, 0xda, 0xdb, 0xd9,
0xfd, 0xdd, 0xaf, 0xb4,
0xad, 0xb1, 0x2017, 0xbe,
0xb6, 0xa7, 0xf7, 0xb8,
0xb0, 0xa8, 0xb7, 0xb9,
0xb3, 0xb2, 0x25a0, 0xa0
}
};

View File

@ -52,6 +52,7 @@ enum Resource_FormatType
Resource_FormatType_iso8859_9, //!< ISO 8859-9 (Turkish) encoding
// Addition code pages
Resource_FormatType_CP850, //!< ISO 850 (Western European) encoding
Resource_FormatType_GBK, //!< GBK (UnifiedChinese) encoding
Resource_FormatType_Big5, //!< Big5 (TradChinese) encoding

View File

@ -634,6 +634,7 @@ void Resource_Unicode::ConvertFormatToUnicode (const Resource_FormatType theForm
case Resource_FormatType_iso8859_7:
case Resource_FormatType_iso8859_8:
case Resource_FormatType_iso8859_9:
case Resource_FormatType_CP850:
{
const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250;
const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex];
@ -718,6 +719,7 @@ Standard_Boolean Resource_Unicode::ConvertUnicodeToFormat(const Resource_FormatT
case Resource_FormatType_iso8859_7:
case Resource_FormatType_iso8859_8:
case Resource_FormatType_iso8859_9:
case Resource_FormatType_CP850:
{
if (theMaxSize < theFromStr.Length())
{

View File

@ -245,9 +245,10 @@ STEPControl_Controller::STEPControl_Controller ()
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-7"); // Resource_FormatType_iso8859_7
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-8"); // Resource_FormatType_iso8859_8
Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-9"); // Resource_FormatType_iso8859_9
Interface_Static::Init("step", "read.step.codepage", '&', "eval CP850"); // Resource_FormatType_CP850
Interface_Static::SetCVal("read.step.codepage", "UTF8");
Standard_STATIC_ASSERT((int)Resource_FormatType_iso8859_9 - (int)Resource_FormatType_CP1250 == 17); // "Error: Invalid Codepage Enumeration"
Standard_STATIC_ASSERT((int)Resource_FormatType_CP850 - (int)Resource_FormatType_CP1250 == 18); // "Error: Invalid Codepage Enumeration"
init = Standard_True;
}

60
tests/bugs/step/bug31923 Normal file
View File

@ -0,0 +1,60 @@
puts "================"
puts "0031923: DXF import - add support of code page DOS850"
puts ""
puts "Test case:"
puts "1) Creates a temporary STEP file-template using WriteStep."
puts "2) Reads generated template and replaces @tmp_name@ entity in it with Latin characters using Tcl."
puts "3) Generates 2 STEP files in UTF-8 and ISO850 encodings (converted by Tcl)."
puts "4) Reads generated files using StepRead and validates entity name."
puts "================"
puts ""
proc fileToString { thePath } {
set aFile [open "$thePath" r]
set aText [read $aFile [file size "$thePath"]]
close $aFile
return $aText
}
proc fileFromString { thePath theContent theCodePage } {
set aFile [open "$thePath" w]
fconfigure $aFile -translation lf -encoding "$theCodePage"
puts $aFile $theContent
close $aFile
}
pload XDE OCAF MODELING VISUALIZATION
set aTmpNameTmpl "@tmp_name@"
set aTmpFileTmpl "${imagedir}/${casename}-tmp.stp"
set aTmpFileUtf8 "${imagedir}/${casename}-tmp-utf8.stp"
set aTmpFileCP850 "${imagedir}/${casename}-tmp-cp850.stp"
# Überprüfung de codificação
set aName [encoding convertfrom utf-8 "\xc3\x9c\x62\x65\x72\x70\x72\xc3\xbc\x66\x75\x6e\x67 \x64\x65 \x63\x6f\x64\x69\x66\x69\x63\x61\xc3\xa7\xc3\xa3\x6f"]
catch { Close A }
catch { Close T }
catch { Close U }
catch { Close C }
box b 1 2 3
XNewDoc T
XAddShape T b 0
XSetColor T b 1 0 0
SetName T 0:1:1:1 "$aTmpNameTmpl"
GetName T 0:1:1:1
WriteStep T "$aTmpFileTmpl"
regsub -all -- $aTmpNameTmpl [fileToString "$aTmpFileTmpl"] "$aName" aContent
fileFromString "$aTmpFileUtf8" "$aContent" "utf-8"
fileFromString "$aTmpFileCP850" "$aContent" "cp850"
param read.step.codepage UTF8
ReadStep U "$aTmpFileUtf8"
ReadStep A "$aTmpFileCP850"
param read.step.codepage CP850
ReadStep C "$aTmpFileCP850"
puts "Original name is 'Überprüfung de codificação'"
if { [GetName U 0:1:1:1] != "$aName" } { puts "Error: unable to read UTF-8 STEP" }
if { [GetName C 0:1:1:1] != "$aName" } { puts "Error: unable to read CP850 STEP" }
if { [GetName A 0:1:1:1] == "$aName" } { puts "Error: broken test case" }