From 475da0f1352e60eebf1432cf136042444c7c5488 Mon Sep 17 00:00:00 2001 From: dpasukhi Date: Sun, 10 Oct 2021 15:33:15 +0300 Subject: [PATCH] 0032310: Data Exchange - Invalid STEP export/import of backslashes in names [Regression since OCCT 7.5.0] Fixed a problem with handling slashes and apostrophes Changed cleaning of the 'TEXT' fields of the step entity to direct order w/o removing operations Changed converting text locale to covert before cleaning directives and specials symbols (all directives and sp. symbols encoded in ASCII[0-125] and haven't any differ with any locals) Special case, when directives have two slashes before name changed to don't handle directives value and converts as is (combine two slashes as one and write, for example "\\X2\00C3" as "X2\00C3") --- src/StepData/StepData_StepReaderData.cxx | 405 ++++++++++++----------- tests/bugs/step/bug28454_2 | 2 +- tests/bugs/step/bug32310 | 28 ++ 3 files changed, 241 insertions(+), 194 deletions(-) create mode 100644 tests/bugs/step/bug32310 diff --git a/src/StepData/StepData_StepReaderData.cxx b/src/StepData/StepData_StepReaderData.cxx index 9cbda7914b..dfa0ff782e 100644 --- a/src/StepData/StepData_StepReaderData.cxx +++ b/src/StepData/StepData_StepReaderData.cxx @@ -74,7 +74,7 @@ static Standard_Integer acceptvoid = 0; //! Convert unsigned character to hexadecimal system, //! if character hasn't representation in this system, returns 0. -static Standard_Integer convertCharacterTo16bit(const unsigned char theCharacter) +static Standard_Integer convertCharacterTo16bit(const Standard_ExtCharacter theCharacter) { switch (theCharacter) { @@ -88,12 +88,12 @@ static Standard_Integer convertCharacterTo16bit(const unsigned char theCharacter case '7': return 7; case '8': return 8; case '9': return 9; - case 'A': return 10; - case 'B': return 11; - case 'C': return 12; - case 'D': return 13; - case 'E': return 14; - case 'F': return 15; + case 'A': case 'a': return 10; + case 'B': case 'b': return 11; + case 'C': case 'c': return 12; + case 'D': case 'd': return 13; + case 'E': case 'e': return 14; + case 'F': case 'f': return 15; default : return 0; } } @@ -102,202 +102,221 @@ static Standard_Integer convertCharacterTo16bit(const unsigned char theCharacter //function : cleanText //purpose : //======================================================================= - void StepData_StepReaderData::cleanText(const Handle(TCollection_HAsciiString)& theVal) const { - Standard_Integer n = theVal->Length(); // string size before reduction - theVal->Remove(n); - theVal->Remove(1); - // Don't forget to treat the special characters - for (Standard_Integer i = n - 2; i > 0; i--) { - char aChar = theVal->Value(i); - if (aChar == '\n') - { theVal->Remove(i); if (i < n-2) aChar = theVal->Value(i); } - if (aChar == '\'' && i < n - 2) { - if (theVal->Value(i + 1) == '\'') { theVal->Remove(i + 1); continue; } - } - if (aChar == '\\' && i < n - 3) { - if (theVal->Value(i + 2) == '\\') { - if (theVal->Value(i + 1) == 'N') - { theVal->SetValue(i,'\n'); theVal->Remove(i+1,2); continue; } - if (theVal->Value(i + 1) == 'T') - { theVal->SetValue(i,'\t'); theVal->Remove(i+1,2); continue; } - } - } - } - - // pass through without conversion the control directives - if (mySourceCodePage == Resource_FormatType_NoConversion) + if (theVal->Length() == 2) + { + theVal->Clear(); return; - - Standard_Integer aFirstCharInd = 1; // begin index of substring to conversion before the control directives - Standard_Integer aLastCharInd = 1; // end index of substring to conversion before the control directives - TCollection_ExtendedString aTempExtString; // string for characters within control directives - TCollection_ExtendedString anOutputExtString; // string for conversion in UTF-8 - Resource_FormatType aLocalFormatType = Resource_FormatType_iso8859_1; // a code page for a "\S\" control directive - for (Standard_Integer i = 1; i <= theVal->Length(); ++i) - { - unsigned char aChar = theVal->Value(i); - if (aChar != '\\' || (theVal->Length() - i) < 3) // does not contain the control directive - { - continue; - } - Standard_Integer aLocalLastCharInd = i - 1; - Standard_Boolean isConverted = Standard_False; - // Encoding ISO 8859 characters within a string; - // ("\P{N}\") control directive; - // indicates code page for ("\S\") control directive; - // {N}: "A", "B", "C", "D", "E", "F", "G", "H", "I"; - // "A" identifies ISO 8859-1; "B" identifies ISO 8859-2, etc. - if (theVal->Value(i + 1) == 'P' && theVal->Length() - i > 3 && theVal->Value(i + 3) == '\\') - { - Standard_Character aPageId = UpperCase (theVal->Value(i + 2)); - if (aPageId >= 'A' && aPageId <= 'I') - { - aLocalFormatType = (Resource_FormatType)(Resource_FormatType_iso8859_1 + (aPageId - 'A')); - } - else - { - thecheck->AddWarning("String control directive \\P*\\ with an unsupported symbol in place of *"); - } - - isConverted = Standard_True; - i += 3; - } - // Encoding ISO 8859 characters within a string; - // ("\S\") control directive; - // converts followed a LATIN CODEPOINT character. - else if (theVal->Value(i + 1) == 'S' && theVal->Length() - i > 2 && theVal->Value(i + 2) == '\\') - { - Standard_Character aResChar = theVal->Value(i + 3) | 0x80; - const char aStrForCovert[2] = { aResChar, '\0' }; - Resource_Unicode::ConvertFormatToUnicode(aLocalFormatType, aStrForCovert, aTempExtString); - isConverted = Standard_True; - i += 3; - } - // Encoding U+0000 to U+00FF in a string - // ("\X\") control directive; - // converts followed two hexadecimal character. - else if (theVal->Value(i + 1) == 'X' && theVal->Length() - i > 3 && theVal->Value(i + 2) == '\\') - { - Standard_Character aResChar = (char)convertCharacterTo16bit(theVal->Value(i + 3)); - aResChar = (aResChar << 4) | (char)convertCharacterTo16bit(theVal->Value(i + 4)); - const char aStrForCovert[2] = { aResChar, '\0' }; - aTempExtString = TCollection_ExtendedString(aStrForCovert, Standard_False); // pass through without conversion - isConverted = Standard_True; - i += 4; - } - // Encoding ISO 10646 characters within a string - // ("\X{N}\") control directive; - // {N}: "0", "2", "4"; - // "\X2\" or "\X4\" converts followed a hexadecimal character sequence; - // "\X0\" indicate the end of the "\X2\" or "\X4\". - else if (theVal->Value(i + 1) == 'X' && theVal->Length() - i > 2 && theVal->Value(i + 3) == '\\') - { - Standard_Integer aFirstInd = i + 3; - Standard_Integer aLastInd = i; - Standard_Boolean isClosed = Standard_False; - for (; i <= theVal->Length() && !isClosed; ++i) // find the end of the "\X2\" or "\X4\" by an external "i" - { - if (theVal->Length() - i > 2 && theVal->Value(i) == '\\' && theVal->Value(i + 1) == 'X' && theVal->Value(i + 2) == '0' && theVal->Value(i + 3) == '\\') - { - aLastInd = i - 1; - i = i + 2; - isClosed = Standard_True; - } - } - if (!isClosed) // "\X0\" not exists - { - aLastInd = theVal->Length(); - } - TCollection_AsciiString aBitString; - aBitString = TCollection_AsciiString(theVal->ToCString() + aFirstInd, aLastInd - aFirstInd); - aBitString.UpperCase(); // make valid for conversion into 16-bit - // "\X2\" control directive; - // followed by multiples of four or three hexadecimal characters. - // Encoding in UTF-16 - if (theVal->Value(aFirstInd - 1) == '2' && theVal->Length() - aFirstInd > 3) - { - Standard_Integer anIterStep = (aBitString.Length() % 4 == 0) ? 4 : 3; - if (aBitString.Length() % anIterStep) - { - aTempExtString.AssignCat('?'); - thecheck->AddWarning("String control directive \\X2\\ is followed by number of digits not multiple of 4"); - } - else - { - Standard_Integer aStrLen = aBitString.Length() / anIterStep; - Standard_Utf16Char aUtfCharacter = '\0'; - for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen * anIterStep; ++aCharInd) - { - aUtfCharacter |= convertCharacterTo16bit(aBitString.Value(aCharInd)); - if (aCharInd % anIterStep == 0) - { - aTempExtString.AssignCat(aUtfCharacter); - aUtfCharacter = '\0'; - } - aUtfCharacter = aUtfCharacter << 4; - } - } - } - // "\X4\" control directive; - // followed by multiples of eight hexadecimal characters. - // Encoding in UTF-32 - else if (theVal->Value(aFirstInd - 1) == '4' && theVal->Length() - aFirstInd > 7) - { - if (aBitString.Length() % 8) - { - aTempExtString.AssignCat('?'); - thecheck->AddWarning("String control directive \\X4\\ is followed by number of digits not multiple of 8"); - } - else - { - Standard_Integer aStrLen = aBitString.Length() / 8; - Standard_Utf32Char aUtfCharacter[2] = {'\0', '\0'}; - for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen * 8; ++aCharInd) - { - aUtfCharacter[0] |= convertCharacterTo16bit(aBitString.Value(aCharInd)); - if (aCharInd % 8 == 0) - { - NCollection_Utf32Iter aUtfIter(aUtfCharacter); - Standard_Utf16Char aStringBuffer[3]; - Standard_Utf16Char* aUtfPntr = aUtfIter.GetUtf16(aStringBuffer); - *aUtfPntr++ = '\0'; - TCollection_ExtendedString aUtfString(aStringBuffer); - aTempExtString.AssignCat(aUtfString); - aUtfCharacter[0] = '\0'; - } - aUtfCharacter[0] = aUtfCharacter[0] << 4; - } - } - } - isConverted = Standard_True; - } - if (isConverted) // find the control directive - { - TCollection_ExtendedString anExtString; - if (aFirstCharInd <= aLocalLastCharInd) - { - Resource_Unicode::ConvertFormatToUnicode(mySourceCodePage, theVal->SubString(aFirstCharInd, aLocalLastCharInd)->ToCString(), anExtString); - } - anOutputExtString.AssignCat(anExtString); - anOutputExtString.AssignCat(aTempExtString); - aFirstCharInd = i + 1; - aLastCharInd = aFirstCharInd; - aTempExtString.Clear(); - } } - if (aLastCharInd <= theVal->Length()) + TCollection_ExtendedString aResString; + const Standard_Boolean toConversion = mySourceCodePage != Resource_FormatType_NoConversion; + Resource_Unicode::ConvertFormatToUnicode(mySourceCodePage, theVal->ToCString() + 1, aResString); + Standard_Integer aResStringSize = aResString.Length() - 1; // skip the last apostrophe + TCollection_ExtendedString aTempExtString; // string for characters within control directives + Standard_Integer aSetCharInd = 1; // index to set value to result string + Resource_FormatType aLocalFormatType = Resource_FormatType_iso8859_1; // a code page for a "\S\" control directive + for (Standard_Integer aStringInd = 1; aStringInd <= aResStringSize; ++aStringInd) { - Resource_Unicode::ConvertFormatToUnicode(mySourceCodePage, theVal->ToCString() + aLastCharInd - 1, aTempExtString); - anOutputExtString.AssignCat(aTempExtString); + const Standard_ExtCharacter aChar = aResString.Value(aStringInd); + aSetCharInd = aStringInd; + if (aChar == '\\' && aStringInd <= aResStringSize - 3) // can contains the control directive + { + Standard_Boolean isConverted = Standard_False; + const Standard_ExtCharacter aDirChar = aResString.Value(aStringInd + 1); + const Standard_Boolean isSecSlash = aResString.Value(aStringInd + 2) == '\\'; + const Standard_Boolean isThirdSlash = aResString.Value(aStringInd + 3) == '\\'; + // Encoding ISO 8859 characters within a string; + // ("\P{N}\") control directive; + // indicates code page for ("\S\") control directive; + // {N}: "A", "B", "C", "D", "E", "F", "G", "H", "I"; + // "A" identifies ISO 8859-1; "B" identifies ISO 8859-2, etc. + if (aDirChar == 'P' && isThirdSlash) + { + const Standard_Character aPageId = + UpperCase(static_cast(aResString.Value(aStringInd + 2) & 255)); + if (aPageId >= 'A' && aPageId <= 'I') + { + aLocalFormatType = (Resource_FormatType)(Resource_FormatType_iso8859_1 + (aPageId - 'A')); + } + else + { + thecheck->AddWarning("String control directive \\P*\\ with an unsupported symbol in place of *"); + } + isConverted = Standard_True; + aStringInd += 3; + } + // Encoding ISO 8859 characters within a string; + // ("\S\") control directive; + // converts followed a LATIN CODEPOINT character. + else if (aDirChar == 'S' && isSecSlash) + { + Standard_Character aResChar = static_cast(aResString.Value(aStringInd + 3) | 0x80); + const char aStrForCovert[2] = { aResChar, '\0' }; + Resource_Unicode::ConvertFormatToUnicode(aLocalFormatType, aStrForCovert, aTempExtString); + isConverted = Standard_True; + aStringInd += 3; + } + // Encoding U+0000 to U+00FF in a string + // ("\X\") control directive; + // converts followed two hexadecimal character. + else if (aDirChar == 'X' && aStringInd <= aResStringSize - 4 && isSecSlash) + { + Standard_Character aResChar = (char)convertCharacterTo16bit(aResString.Value(aStringInd + 3)); + aResChar = (aResChar << 4) | (char)convertCharacterTo16bit(aResString.Value(aStringInd + 4)); + const char aStrForConvert[2] = { aResChar, '\0' }; + aTempExtString = TCollection_ExtendedString(aStrForConvert, Standard_False); // pass through without conversion + isConverted = Standard_True; + aStringInd += 4; + } + // Encoding ISO 10646 characters within a string + // ("\X{N}\") control directive; + // {N}: "0", "2", "4"; + // "\X2\" or "\X4\" converts followed a hexadecimal character sequence; + // "\X0\" indicate the end of the "\X2\" or "\X4\". + else if (aDirChar == 'X' && isThirdSlash) + { + Standard_Integer aFirstInd = aStringInd + 3; + Standard_Integer aLastInd = aStringInd; + Standard_Boolean isClosed = Standard_False; + // find the end of the "\X2\" or "\X4\" by an external "aStringInd" + for (; aStringInd <= aResStringSize && !isClosed; ++aStringInd) + { + if (aResStringSize - aStringInd > 2 && aResString.Value(aStringInd) == '\\' && + aResString.Value(aStringInd + 1) == 'X' && aResString.Value(aStringInd + 2) == '0' && + aResString.Value(aStringInd + 3) == '\\') + { + aLastInd = aStringInd - 1; + aStringInd = aStringInd + 2; + isClosed = Standard_True; + } + } + if (!isClosed) // "\X0\" not exists + { + aLastInd = aStringInd = aResStringSize; + } + const Standard_Integer aStrLen = aLastInd - aFirstInd; + // "\X2\" control directive; + // followed by multiples of four or three hexadecimal characters. + // Encoding in UTF-16 + if (aResString.Value(aFirstInd - 1) == '2' && aResStringSize - aFirstInd > 3) + { + Standard_Integer anIterStep = (aStrLen % 4 == 0) ? 4 : 3; + if (aStrLen % anIterStep) + { + aTempExtString.AssignCat('?'); + thecheck->AddWarning("String control directive \\X2\\ is followed by number of digits not multiple of 4"); + } + else + { + Standard_Utf16Char aUtfCharacter = '\0'; + for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen; ++aCharInd) + { + aUtfCharacter |= convertCharacterTo16bit(aResString.Value(aCharInd + aFirstInd)); + if (aCharInd % anIterStep == 0) + { + aTempExtString.AssignCat(aUtfCharacter); + aUtfCharacter = '\0'; + } + aUtfCharacter = aUtfCharacter << 4; + } + } + } + // "\X4\" control directive; + // followed by multiples of eight hexadecimal characters. + // Encoding in UTF-32 + else if (aResString.Value(aFirstInd - 1) == '4' && aResStringSize - aFirstInd > 7) + { + if (aStrLen % 8) + { + aTempExtString.AssignCat('?'); + thecheck->AddWarning("String control directive \\X4\\ is followed by number of digits not multiple of 8"); + } + else + { + Standard_Utf32Char aUtfCharacter[2] = { '\0', '\0' }; + for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen; ++aCharInd) + { + aUtfCharacter[0] |= convertCharacterTo16bit(aResString.Value(aCharInd + aFirstInd)); + if (aCharInd % 8 == 0) + { + NCollection_Utf32Iter aUtfIter(aUtfCharacter); + Standard_Utf16Char aStringBuffer[3]; + Standard_Utf16Char* aUtfPntr = aUtfIter.GetUtf16(aStringBuffer); + *aUtfPntr++ = '\0'; + TCollection_ExtendedString aUtfString(aStringBuffer); + aTempExtString.AssignCat(aUtfString); + aUtfCharacter[0] = '\0'; + } + aUtfCharacter[0] = aUtfCharacter[0] << 4; + } + } + } + isConverted = Standard_True; + } + if (isConverted) // find the control directive + { + if (toConversion) // else skip moving + { + aResStringSize -= aStringInd - aSetCharInd - aTempExtString.Length() + 1; // change the string size to remove unused symbols + aResString.SetValue(aSetCharInd, aTempExtString); + aSetCharInd += aTempExtString.Length(); // move to the new position + aResString.SetValue(aSetCharInd, aResString.ToExtString() + aStringInd); + aStringInd = aSetCharInd - 1; + aResString.Trunc(aResStringSize);; + } + aTempExtString.Clear(); + continue; + } + } + if (aStringInd <= aResStringSize - 1) + { + const Standard_ExtCharacter aCharNext = aResString.Value(aStringInd + 1); + if (aCharNext == aChar && (aChar == '\'' || aChar == '\\')) + { + aResString.SetValue(aSetCharInd, aResString.ToExtString() + aStringInd); // move the string,removing one symbol + aResStringSize--; // change the string size to remove unused symbol + aResString.Trunc(aResStringSize); + } + else if (aChar == '\\') + { + const Standard_Boolean isDirective = + aStringInd <= aResStringSize - 2 && aResString.Value(aStringInd + 2) == '\\'; + if (isDirective) + { + if (aCharNext == 'N') + { + aResString.SetValue(aSetCharInd++, '\n'); + aResString.SetValue(aSetCharInd, aResString.ToExtString() + aStringInd + 2); // move the string,removing two symbols + aResStringSize-=2; // change the string size to remove unused symbols + aResString.Trunc(aResStringSize); + continue; + } + else if (aCharNext == 'T') + { + aResString.SetValue(aSetCharInd++, '\t'); + aResString.SetValue(aSetCharInd, aResString.ToExtString() + aStringInd + 2); // move the string,removing two symbols + aResStringSize-=2; // change the string size to remove unused symbols + aResString.Trunc(aResStringSize); + continue; + } + } + } + } + if (aChar == '\n') + { + aResString.SetValue(aSetCharInd, aResString.ToExtString() + aStringInd); + aResStringSize--; + aResString.Trunc(aResStringSize); + aStringInd--; + } } theVal->Clear(); - TCollection_AsciiString aTmpString(anOutputExtString, 0); + aResString.Trunc(aResStringSize); // trunc the last apostrophe + TCollection_AsciiString aTmpString(aResString, 0); theVal->AssignCat(aTmpString.ToCString()); } - // ------------- METHODES ------------- //======================================================================= diff --git a/tests/bugs/step/bug28454_2 b/tests/bugs/step/bug28454_2 index 746790313a..4020dffcd2 100644 --- a/tests/bugs/step/bug28454_2 +++ b/tests/bugs/step/bug28454_2 @@ -20,7 +20,7 @@ if { [GetName D 0:1:1:2] != "π" } { puts "Error: name has been lost on conversion \X2\ directive" } puts {# STEP string: 'Expression: \X2\03B103B2\X0\\\X2\03B3\X0\*\X2\03C0\X0\'} -if { [GetName D 0:1:1:3] != "Expression: αβ\\γ*π" } { +if { [GetName D 0:1:1:3] != "Expression: αβ\\X2\\03B3*π" } { puts "Error: name has been lost on conversion \X2\ directive" } puts {# STEP string: '\X4\0001F7CA\X0\'} diff --git a/tests/bugs/step/bug32310 b/tests/bugs/step/bug32310 new file mode 100644 index 0000000000..93dafda023 --- /dev/null +++ b/tests/bugs/step/bug32310 @@ -0,0 +1,28 @@ +puts "====================================" +puts "0032310: Data Exchange - Invalid STEP export/import of backslashes in names \[Regression since OCCT 7.5.0\]" +puts "====================================" +puts "" + +pload DCAF +Close D -silent + +XNewDoc D +box box 1 1 1 +XAddShape D box + +SetName D 0:1:1:1 "a'''\\b\n\t\\c\\\\\\\\" +set original_name [GetName D 0:1:1:1] + +WriteStep D "$imagedir/${casename}.stp" +Close D + +ReadStep D "$imagedir/${casename}.stp" +set imported_name [GetName D 0:1:1:1] + +file delete "$imagedir/${casename}.stp" + +if {$original_name != $imported_name} { + puts "Error: 'Incorrect exporting name: $original_name != $imported_name" +} + +Close D