1
0
mirror of https://git.dev.opencascade.org/repos/occt.git synced 2025-07-20 12:45:50 +03:00

Data Exchange, Step Export - Preserving control directives (#601)

- Introduced `CleanTextForSend` static helper with detailed documentation.
- Updated `StepData_StepWriter::Send` to use the new helper and simplified quoting/line‐wrapping logic.
- Added comprehensive GTests for `CleanTextForSend` and updated the test suite configuration.
This commit is contained in:
Pasukhin Dmitry 2025-07-12 17:05:39 +01:00 committed by GitHub
parent 3d3a47a33a
commit 08f6de3aff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 346 additions and 88 deletions

View File

@ -3,6 +3,7 @@ set(OCCT_TKDESTEP_GTests_FILES_LOCATION "${CMAKE_CURRENT_LIST_DIR}")
set(OCCT_TKDESTEP_GTests_FILES
STEPConstruct_RenderingProperties_Test.cxx
StepData_StepWriter_Test.cxx
StepTidy_BaseTestFixture.pxx
StepTidy_Axis2Placement3dReducer_Test.cxx
StepTidy_CartesianPointReducer_Test.cxx

View File

@ -0,0 +1,160 @@
// Copyright (c) 2025 OPEN CASCADE SAS
//
// This file is part of Open CASCADE Technology software library.
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License version 2.1 as published
// by the Free Software Foundation, with special exception defined in the file
// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
// distribution for complete text of the license and disclaimer of any warranty.
//
// Alternatively, this file may be used under the terms of Open CASCADE
// commercial license or contractual agreement.
#include <StepData_StepWriter.hxx>
#include <TCollection_AsciiString.hxx>
#include <gtest/gtest.h>
// Test CleanTextForSend with basic character escaping
TEST(StepData_StepWriterTest, CleanTextForSend_BasicEscaping)
{
// Test single quote escaping
TCollection_AsciiString anInput1("text with 'single quotes'");
TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
EXPECT_STREQ(aResult1.ToCString(), "text with ''single quotes''");
// Test backslash escaping
TCollection_AsciiString anInput2("path\\with\\backslashes");
TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
EXPECT_STREQ(aResult2.ToCString(), "path\\\\with\\\\backslashes");
// Test newline escaping
TCollection_AsciiString anInput3("line1\nline2");
TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
EXPECT_STREQ(aResult3.ToCString(), "line1\\N\\line2");
// Test tab escaping
TCollection_AsciiString anInput4("text\twith\ttabs");
TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4);
EXPECT_STREQ(aResult4.ToCString(), "text\\T\\with\\T\\tabs");
}
// Test CleanTextForSend with control directives preservation
TEST(StepData_StepWriterTest, CleanTextForSend_ControlDirectivePreservation)
{
// Test \X\ control directive preservation
TCollection_AsciiString anInput1("text with \\XA7\\ section sign");
TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
EXPECT_STREQ(aResult1.ToCString(), "text with \\XA7\\ section sign");
// Test \X2\ control directive preservation
TCollection_AsciiString anInput2("\\X2\\03C0\\X0\\ is pi");
TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
EXPECT_STREQ(aResult2.ToCString(), "\\X2\\03C0\\X0\\ is pi");
// Test \X4\ control directive preservation
TCollection_AsciiString anInput3("emoji \\X4\\001F600\\X0\\ face");
TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
EXPECT_STREQ(aResult3.ToCString(), "emoji \\X4\\001F600\\X0\\ face");
// Test \S\ control directive preservation
TCollection_AsciiString anInput4("text with \\S\\ directive");
TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4);
EXPECT_STREQ(aResult4.ToCString(), "text with \\S\\ directive");
// Test \P\ control directive preservation
TCollection_AsciiString anInput5("\\PA\\ code page setting");
TCollection_AsciiString aResult5 = StepData_StepWriter::CleanTextForSend(anInput5);
EXPECT_STREQ(aResult5.ToCString(), "\\PA\\ code page setting");
}
// Test CleanTextForSend with existing \N\ and \T\ directive preservation
TEST(StepData_StepWriterTest, CleanTextForSend_ExistingDirectivePreservation)
{
// Test existing \N\ directive preservation
TCollection_AsciiString anInput1("line1\\N\\line2");
TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
EXPECT_STREQ(aResult1.ToCString(), "line1\\N\\line2");
// Test existing \T\ directive preservation
TCollection_AsciiString anInput2("text\\T\\with\\T\\tab");
TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
EXPECT_STREQ(aResult2.ToCString(), "text\\T\\with\\T\\tab");
}
// Test CleanTextForSend with mixed content
TEST(StepData_StepWriterTest, CleanTextForSend_MixedContent)
{
// Test quotes outside control directives
TCollection_AsciiString anInput1("see \\XA7\\ section and 'quotes'");
TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
EXPECT_STREQ(aResult1.ToCString(), "see \\XA7\\ section and ''quotes''");
// Test backslashes outside control directives
TCollection_AsciiString anInput2("\\XA7\\ and path\\file");
TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
EXPECT_STREQ(aResult2.ToCString(), "\\XA7\\ and path\\\\file");
// Test complex mixture
TCollection_AsciiString anInput3("prefix \\X2\\03B103B2\\X0\\ 'text' with\ttab");
TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
EXPECT_STREQ(aResult3.ToCString(), "prefix \\X2\\03B103B2\\X0\\ ''text'' with\\T\\tab");
}
// Test CleanTextForSend with edge cases
TEST(StepData_StepWriterTest, CleanTextForSend_EdgeCases)
{
// Test empty string
TCollection_AsciiString anInput1("");
TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
EXPECT_STREQ(aResult1.ToCString(), "");
// Test string with only quotes
TCollection_AsciiString anInput2("''");
TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
EXPECT_STREQ(aResult2.ToCString(), "''''");
// Test string with only control directive
TCollection_AsciiString anInput3("\\XA7\\");
TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
EXPECT_STREQ(aResult3.ToCString(), "\\XA7\\");
// Test consecutive control directives
TCollection_AsciiString anInput4("\\XA7\\\\XB6\\");
TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4);
EXPECT_STREQ(aResult4.ToCString(), "\\XA7\\\\XB6\\");
}
// Test CleanTextForSend with malformed but safe input
TEST(StepData_StepWriterTest, CleanTextForSend_MalformedInput)
{
// Test incomplete control directive (should be treated as regular text)
TCollection_AsciiString anInput1("incomplete \\X and 'quotes'");
TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
EXPECT_STREQ(aResult1.ToCString(), "incomplete \\\\X and ''quotes''");
// Test partial control directive
TCollection_AsciiString anInput2("partial \\XA and more");
TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
EXPECT_STREQ(aResult2.ToCString(), "partial \\\\XA and more");
}
// Test CleanTextForSend hex sequence detection
TEST(StepData_StepWriterTest, CleanTextForSend_HexSequenceDetection)
{
// Test valid hex sequences in \X2\ directive
TCollection_AsciiString anInput1("\\X2\\03B103B203B3\\X0\\");
TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
EXPECT_STREQ(aResult1.ToCString(), "\\X2\\03B103B203B3\\X0\\");
// Test valid hex sequences in \X4\ directive
TCollection_AsciiString anInput2("\\X4\\001F600001F638\\X0\\");
TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
EXPECT_STREQ(aResult2.ToCString(), "\\X4\\001F600001F638\\X0\\");
// Test text around hex sequences
TCollection_AsciiString anInput3("start \\X2\\03C0\\X0\\ end");
TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
EXPECT_STREQ(aResult3.ToCString(), "start \\X2\\03C0\\X0\\ end");
}

View File

@ -828,122 +828,67 @@ void StepData_StepWriter::Send(const Standard_Real val)
void StepData_StepWriter::Send(const TCollection_AsciiString& val)
{
AddParam();
TCollection_AsciiString aval(val); // on duplique pour trafiquer si besoin
Standard_Integer nb = aval.Length();
Standard_Integer nn = nb;
aval.AssignCat('\''); // comme cela, Insert(i+1) est OK
// Use helper function to clean text while preserving control directives
TCollection_AsciiString aVal = CleanTextForSend(val);
Standard_Integer aNn = aVal.Length();
// Add surrounding quotes
aVal.Insert(1, '\'');
aVal.AssignCat('\'');
aNn += 2;
// Conversion des Caracteres speciaux
for (Standard_Integer i = nb; i > 0; i--)
{
char uncar = aval.Value(i);
if (uncar == '\'')
{
aval.Insert(i + 1, '\'');
nn++;
continue;
}
if (uncar == '\\')
{
aval.Insert(i + 1, '\\');
nn++;
continue;
}
if (uncar == '\n')
{
aval.SetValue(i, '\\');
aval.Insert(i + 1, '\\');
aval.Insert(i + 1, 'N');
nn += 2;
continue;
}
if (uncar == '\t')
{
aval.SetValue(i, '\\');
aval.Insert(i + 1, '\\');
aval.Insert(i + 1, 'T');
nn += 2;
continue;
}
}
//: i2 abv 31 Aug 98: ProSTEP TR9: avoid wrapping text or do it at spaces
aval.Insert(1, '\'');
nn += 2;
//: i2 AddString ("\'",1); nn ++;
// Attention au depassement des 72 caracteres
if (thecurr.CanGet(nn))
AddString(aval, 0);
if (thecurr.CanGet(aNn))
AddString(aVal, 0);
//: i2
else
{
thefile->Append(thecurr.Moved());
Standard_Integer indst = thelevel * 2;
Standard_Integer anIndst = thelevel * 2;
if (theindent)
indst += theindval;
if (indst + nn <= StepLong)
thecurr.SetInitial(indst);
anIndst += theindval;
if (anIndst + aNn <= StepLong)
thecurr.SetInitial(anIndst);
else
thecurr.SetInitial(0);
if (thecurr.CanGet(nn))
AddString(aval, 0);
if (thecurr.CanGet(aNn))
AddString(aVal, 0);
else
{
while (nn > 0)
while (aNn > 0)
{
if (nn <= StepLong)
if (aNn <= StepLong)
{
thecurr.Add(aval); // Ca yet, on a tout epuise
thecurr.Add(aVal); // Ca yet, on a tout epuise
thecurr.FreezeInitial();
break;
}
Standard_Integer stop = StepLong; // position of last separator
for (; stop > 0 && aval.Value(stop) != ' '; stop--)
Standard_Integer aStop = StepLong; // position of last separator
for (; aStop > 0 && aVal.Value(aStop) != ' '; aStop--)
;
if (!stop)
if (!aStop)
{
stop = StepLong;
for (; stop > 0 && aval.Value(stop) != '\\'; stop--)
aStop = StepLong;
for (; aStop > 0 && aVal.Value(aStop) != '\\'; aStop--)
;
if (!stop)
if (!aStop)
{
stop = StepLong;
for (; stop > 0 && aval.Value(stop) != '_'; stop--)
aStop = StepLong;
for (; aStop > 0 && aVal.Value(aStop) != '_'; aStop--)
;
if (!stop)
stop = StepLong;
if (!aStop)
aStop = StepLong;
}
}
TCollection_AsciiString bval = aval.Split(stop);
thefile->Append(new TCollection_HAsciiString(aval));
aval = bval;
nn -= stop;
TCollection_AsciiString aBval = aVal.Split(aStop);
thefile->Append(new TCollection_HAsciiString(aVal));
aVal = aBval;
aNn -= aStop;
}
}
}
/* //:i2
else {
// Il faut tronconner ... lignes limitees a 72 caracteres (StepLong)
Standard_Integer ncurr = thecurr.Length();
Standard_Integer nbuff = StepLong - ncurr;
thecurr.Add (aval.ToCString(),nbuff);
thefile->Append(thecurr.Moved());
aval.Remove(1,nbuff);
nn -= nbuff;
while (nn > 0) {
if (nn <= StepLong) {
thecurr.Add (aval); // Ca yet, on a tout epuise
thecurr.FreezeInitial();
break;
}
TCollection_AsciiString bval = aval.Split(StepLong);
thefile->Append(new TCollection_HAsciiString(bval));
nn -= StepLong;
}
}
//:i2 */
// thecurr.Add('\''); deja mis dans aval au debut
}
//=================================================================================================
@ -1214,3 +1159,124 @@ Standard_Boolean StepData_StepWriter::Print(Standard_OStream& S)
return isGood;
}
//=================================================================================================
TCollection_AsciiString StepData_StepWriter::CleanTextForSend(
const TCollection_AsciiString& theText)
{
TCollection_AsciiString aResult;
const Standard_Integer aNb = theText.Length();
// Process characters from beginning to end
for (Standard_Integer anI = 1; anI <= aNb; anI++)
{
const char anUncar = theText.Value(anI);
// Check if we're at the start of a control directive
Standard_Boolean anIsDirective = Standard_False;
Standard_Integer aDirectiveLength = 0;
if (anUncar == '\\' && anI <= aNb)
{
// Check for \X2\ and \X4\ patterns first (need exactly 4 characters: \X2\)
if (anI + 3 <= aNb && theText.Value(anI + 1) == 'X' && theText.Value(anI + 3) == '\\')
{
const char aThirdChar = theText.Value(anI + 2);
// \X2, \X4, \X0 patterns - special control sequences
if (aThirdChar == '2' || aThirdChar == '4' || aThirdChar == '0')
{
anIsDirective = Standard_True;
aDirectiveLength = 4; // Basic directive length: \X2\, \X4\, \X0\
// For \X2 and \X4, find the terminating \X0 sequence
if (aThirdChar == '2' || aThirdChar == '4')
{
Standard_Integer aJ = anI + 4;
while (aJ <= aNb - 3)
{
if (theText.Value(aJ) == '\\' && theText.Value(aJ + 1) == 'X'
&& theText.Value(aJ + 2) == '0' && theText.Value(aJ + 3) == '\\')
{
aDirectiveLength = (aJ + 4) - anI; // Include the \X0 sequence
break;
}
aJ++;
}
}
}
}
// Check for \X{HH}\ pattern (need exactly 5 characters: \X{HH}\)
else if (anI + 4 <= aNb && theText.Value(anI + 1) == 'X' && theText.Value(anI + 4) == '\\')
{
const char aThirdChar = theText.Value(anI + 2);
const char aFourthChar = theText.Value(anI + 3);
// Regular \X{HH}\ pattern - check for two hex characters
if (std::isxdigit(aThirdChar) && std::isxdigit(aFourthChar))
{
anIsDirective = Standard_True;
aDirectiveLength = 5; // Control directive with two hex chars
}
}
// Check for \S, \N, \T patterns (need exactly 3 characters: \S\)
else if (anI + 2 <= aNb && theText.Value(anI + 2) == '\\')
{
const char aSecondChar = theText.Value(anI + 1);
if (aSecondChar == 'S' || aSecondChar == 'N' || aSecondChar == 'T')
{
anIsDirective = Standard_True;
aDirectiveLength = 3; // Simple directive pattern
}
}
// Check for \P{char}\ patterns (need exactly 4 characters: \P{char}\)
else if (anI + 3 <= aNb && theText.Value(anI + 1) == 'P' && theText.Value(anI + 3) == '\\')
{
const char aSecondChar = theText.Value(anI + 2);
if (std::isalpha(aSecondChar))
{
anIsDirective = Standard_True;
aDirectiveLength = 4; // P directive with parameter
}
}
}
if (anIsDirective)
{
// Copy the entire directive as-is
for (Standard_Integer aJ = 0; aJ < aDirectiveLength; aJ++)
{
aResult += theText.Value(anI + aJ);
}
anI += aDirectiveLength - 1; // Move past directive (loop will increment by 1)
}
else
{
// Process non-directive characters
if (anUncar == '\'')
{
aResult += "''"; // Double the quote
}
else if (anUncar == '\\')
{
aResult += "\\\\"; // Double the backslash
}
else if (anUncar == '\n')
{
aResult += "\\N\\"; // Convert to directive
}
else if (anUncar == '\t')
{
aResult += "\\T\\"; // Convert to directive
}
else
{
aResult += anUncar; // Copy as-is
}
}
}
return aResult;
}

View File

@ -267,6 +267,37 @@ public:
//! then clears it
Standard_EXPORT Standard_Boolean Print(Standard_OStream& S);
//! Static helper function to prepare text for STEP file output while preserving
//! existing ISO 10303-21 control directives.
//!
//! This function processes input text and escapes special characters (quotes, backslashes,
//! newlines, tabs) for STEP file format compliance, while carefully preserving any existing
//! control directives that may already be present in the input string.
//!
//! Supported control directive patterns that are preserved:
//! - \X{HH}\ : Single byte character encoding (U+0000 to U+00FF)
//! - \X2\{HHHH}...\X0\ : UTF-16 character encoding
//! - \X4\{HHHHHHHH}...\X0\ : UTF-32 character encoding
//! - \S\ : Latin codepoint character with current code page
//! - \P{A-I}\ : Code page control directive
//! - \N\ : Newline directive (preserved as-is)
//! - \T\ : Tab directive (preserved as-is)
//!
//! Character escaping performed (only on non-directive content):
//! - Single quote (') -> double quote ('')
//! - Backslash (\) -> double backslash (\\)
//! - Newline character -> \N\ directive
//! - Tab character -> \T\ directive
//!
//! Example:
//! Input: "text with \XA7\ and 'quotes'"
//! Output: "text with \XA7\ and ''quotes''"
//!
//! @param theText The input text string to be processed
//! @return Processed text with preserved control directives and escaped special characters
Standard_EXPORT static TCollection_AsciiString CleanTextForSend(
const TCollection_AsciiString& theText);
protected:
private:
//! adds a string to current line; first flushes it if full