mirror of
https://git.dev.opencascade.org/repos/occt.git
synced 2025-04-04 18:06:22 +03:00
0031340: LDOM fails to read XML file starting with BOM
Added support of BOM (byte order mask) characters at the start of an XML stream or file and provided information about found BOM in LDOMParser.
This commit is contained in:
parent
b3df3dcd82
commit
8f34d47e21
@ -106,6 +106,18 @@ const TCollection_AsciiString& LDOMParser::GetError
|
||||
return myError;
|
||||
}
|
||||
|
||||
//=======================================================================
|
||||
//function : GetBOM
|
||||
//purpose : Returns the byte order mask defined at the start of a stream
|
||||
//=======================================================================
|
||||
|
||||
LDOM_OSStream::BOMType LDOMParser::GetBOM() const
|
||||
{
|
||||
if (myReader)
|
||||
return myReader->GetBOM();
|
||||
return LDOM_OSStream::BOM_UNDEFINED;
|
||||
}
|
||||
|
||||
//=======================================================================
|
||||
//function : parse
|
||||
//purpose :
|
||||
|
@ -64,6 +64,9 @@ class LDOMParser
|
||||
GetError (TCollection_AsciiString& aData) const;
|
||||
// Return text describing a parsing error, or Empty if no error occurred
|
||||
|
||||
// Returns the byte order mask defined at the start of a stream
|
||||
Standard_EXPORT LDOM_OSStream::BOMType GetBOM() const;
|
||||
|
||||
protected:
|
||||
// ---------- PROTECTED METHODS ----------
|
||||
|
||||
|
@ -112,6 +112,23 @@ public:
|
||||
|
||||
private:
|
||||
LDOM_SBuffer myBuffer;
|
||||
|
||||
public:
|
||||
// byte order mark defined at the start of a stream
|
||||
enum BOMType {
|
||||
BOM_UNDEFINED,
|
||||
BOM_UTF8,
|
||||
BOM_UTF16BE,
|
||||
BOM_UTF16LE,
|
||||
BOM_UTF32BE,
|
||||
BOM_UTF32LE,
|
||||
BOM_UTF7,
|
||||
BOM_UTF1,
|
||||
BOM_UTFEBCDIC,
|
||||
BOM_SCSU,
|
||||
BOM_BOCU1,
|
||||
BOM_GB18030
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -73,7 +73,8 @@ LDOM_XmlReader::LDOM_XmlReader (
|
||||
myLastChild(NULL),
|
||||
myPtr (&myBuffer[0]),
|
||||
myEndPtr (&myBuffer[0]),
|
||||
myTagPerStep (theTagPerStep)
|
||||
myTagPerStep (theTagPerStep),
|
||||
myBOM (LDOM_OSStream::BOM_UNDEFINED)
|
||||
{
|
||||
}
|
||||
|
||||
@ -92,6 +93,7 @@ LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord (Standard_IStream& theIStr
|
||||
LDOMBasicString anAttrName, anAttrValue;
|
||||
char anAttDelimiter = '\0';
|
||||
Standard_Boolean aHasRead = Standard_False;
|
||||
Standard_Boolean isFileStart = !myEOF && theIStream.tellg() == std::iostream::pos_type(0);
|
||||
|
||||
for(;;) {
|
||||
// Check if the current file buffer is exhausted
|
||||
@ -153,6 +155,98 @@ LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord (Standard_IStream& theIStr
|
||||
myBuffer[aBytesRest + aNBytes] = '\0';
|
||||
}
|
||||
}
|
||||
if (isFileStart)
|
||||
{
|
||||
isFileStart = Standard_False;
|
||||
// check for BOM block
|
||||
Standard_Utf8UChar aFirstChar = Standard_Utf8UChar(myPtr[0]);
|
||||
switch(aFirstChar) {
|
||||
case 0xEF:
|
||||
if (Standard_Utf8UChar(myPtr[1]) == 0xBB && Standard_Utf8UChar(myPtr[2]) == 0xBF)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTF8;
|
||||
myPtr += 3;
|
||||
}
|
||||
break;
|
||||
case 0xFE:
|
||||
if (Standard_Utf8UChar(myPtr[1]) == 0xFF)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTF16BE;
|
||||
myPtr += 2;
|
||||
}
|
||||
break;
|
||||
case 0xFF:
|
||||
if (Standard_Utf8UChar(myPtr[1]) == 0xFE)
|
||||
{
|
||||
if (myPtr[2] == 0 && myPtr[3] == 0)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTF32LE;
|
||||
myPtr += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTF16LE;
|
||||
myPtr += 2;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 0x00:
|
||||
if (myPtr[1] == 0 && Standard_Utf8UChar(myPtr[2]) == 0xFE && Standard_Utf8UChar(myPtr[3]) == 0xFF)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTF32BE;
|
||||
myPtr += 4;
|
||||
}
|
||||
break;
|
||||
case 0x2B:
|
||||
if (myPtr[1] == 47 && myPtr[2] == 118 &&
|
||||
(myPtr[3] == 43 || myPtr[3] == 47 || myPtr[3] == 56 || myPtr[3] == 57))
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTF7;
|
||||
if (myPtr[3] == 56 && myPtr[3] == 45)
|
||||
myPtr += 5;
|
||||
else
|
||||
myPtr += 4;
|
||||
}
|
||||
break;
|
||||
case 0xF7:
|
||||
if (myPtr[1] == 100 && myPtr[2] == 76)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTF1;
|
||||
myPtr += 3;
|
||||
}
|
||||
break;
|
||||
case 0xDD:
|
||||
if (myPtr[1] == 115 && myPtr[2] == 102 && myPtr[3] == 115)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_UTFEBCDIC;
|
||||
myPtr += 4;
|
||||
}
|
||||
break;
|
||||
case 0x0E:
|
||||
if (Standard_Utf8UChar(myPtr[1]) == 0xFE && Standard_Utf8UChar(myPtr[2]) == 0xFF)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_SCSU;
|
||||
myPtr += 3;
|
||||
}
|
||||
break;
|
||||
case 0xFB:
|
||||
if (Standard_Utf8UChar(myPtr[1]) == 0xEE && myPtr[2] == 40)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_BOCU1;
|
||||
myPtr += 3;
|
||||
}
|
||||
break;
|
||||
case 0x84:
|
||||
if (myPtr[1] == 49 && Standard_Utf8UChar(myPtr[2]) == 0x95 && myPtr[3] == 51)
|
||||
{
|
||||
myBOM = LDOM_OSStream::BOM_GB18030;
|
||||
myPtr += 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (myBOM != LDOM_OSStream::BOM_UNDEFINED)
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check the character data
|
||||
switch (aState) {
|
||||
|
@ -23,9 +23,9 @@
|
||||
#define XML_BUFFER_SIZE 20480
|
||||
|
||||
#include <LDOM_BasicElement.hxx>
|
||||
#include <LDOM_OSStream.hxx>
|
||||
|
||||
class TCollection_AsciiString;
|
||||
class LDOM_OSStream;
|
||||
|
||||
// Class LDOM_XmlReader
|
||||
//
|
||||
@ -66,6 +66,9 @@ class LDOM_XmlReader
|
||||
const char * theEnd);
|
||||
// try convert string theStart to LDOM_AsciiInteger, return False on success
|
||||
|
||||
// Returns the byte order mask defined at the start of a stream
|
||||
LDOM_OSStream::BOMType GetBOM() const { return myBOM; }
|
||||
|
||||
private:
|
||||
// ---------- PRIVATE (PROHIBITED) METHODS ----------
|
||||
LDOM_XmlReader (const LDOM_XmlReader& theOther);
|
||||
@ -86,6 +89,7 @@ class LDOM_XmlReader
|
||||
const char * myEndPtr;
|
||||
char myBuffer [XML_BUFFER_SIZE+4];
|
||||
Standard_Boolean myTagPerStep;
|
||||
LDOM_OSStream::BOMType myBOM;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -674,10 +674,6 @@ static Standard_Integer OCC983 (Draw_Interpretor& di, Standard_Integer argc, co
|
||||
di << " AttributeValue = " << itemValue.ToCString() << "\n";
|
||||
}
|
||||
|
||||
// LDOM_Element element;
|
||||
// for ( element = (const LDOM_Element&) root.getFirstChild();
|
||||
// !element.isNull();
|
||||
// element = (const LDOM_Element&) element.getNextSibling() ) {
|
||||
LDOM_Element element;
|
||||
LDOM_Node node;
|
||||
for ( node = root.getFirstChild(), element = (const LDOM_Element&) node;
|
||||
@ -694,6 +690,25 @@ static Standard_Integer OCC983 (Draw_Interpretor& di, Standard_Integer argc, co
|
||||
di << " AttributeValue = " << itemValue2.ToCString() << "\n";
|
||||
}
|
||||
}
|
||||
if (aParser.GetBOM() != LDOM_OSStream::BOM_UNDEFINED)
|
||||
{
|
||||
di << "BOM is ";
|
||||
switch (aParser.GetBOM()) {
|
||||
case LDOM_OSStream::BOM_UTF8: di << "UTF-8"; break;
|
||||
case LDOM_OSStream::BOM_UTF16BE: di << "UTF-16 (BE)"; break;
|
||||
case LDOM_OSStream::BOM_UTF16LE: di << "UTF-16 (LE)"; break;
|
||||
case LDOM_OSStream::BOM_UTF32BE: di << "UTF-32 (BE)"; break;
|
||||
case LDOM_OSStream::BOM_UTF32LE: di << "UTF-32 (LE)"; break;
|
||||
case LDOM_OSStream::BOM_UTF7: di << "UTF-7"; break;
|
||||
case LDOM_OSStream::BOM_UTF1: di << "UTF-1"; break;
|
||||
case LDOM_OSStream::BOM_UTFEBCDIC: di << "UTF-EBCDIC"; break;
|
||||
case LDOM_OSStream::BOM_SCSU: di << "SCSU"; break;
|
||||
case LDOM_OSStream::BOM_BOCU1: di << "BOCU-1"; break;
|
||||
case LDOM_OSStream::BOM_GB18030: di << "GB-18030"; break;
|
||||
default: di << "unexpected";
|
||||
}
|
||||
di << "\n";
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
18
tests/bugs/fclasses/bug31340
Normal file
18
tests/bugs/fclasses/bug31340
Normal file
@ -0,0 +1,18 @@
|
||||
puts "================"
|
||||
puts "0031340: LDOM fails to read XML file starting with BOM"
|
||||
puts "================"
|
||||
puts ""
|
||||
|
||||
pload QAcommands
|
||||
|
||||
set list [OCC983 [locate_data_file bug31340.xml]]
|
||||
|
||||
if { [regexp "Document parsed" $list] == 1 } {
|
||||
if {[lsearch -exact ${list} "UTF-8"] != -1 } {
|
||||
puts "OK"
|
||||
} else {
|
||||
puts "Error : BOM was not found in $list"
|
||||
}
|
||||
} else {
|
||||
puts "Error : document not parsed"
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user