Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Not A Problem
-
None
-
None
-
None
-
SunOS 5.10 Generic_139555-08 sun4u sparc SUNW,Sun-Fire-V245
xerces C++ 3.1.1
Description
Hi owners,
I got a problem when using Xerces C++ 3.1.1 to do schema validation which has Japanese words in the xml file. it raised FatalError: invalid multi-byte sequence and stop validation.
Environment: Linux
Locale:
LANG=
LC_CTYPE=en_GB.ISO8859-1
LC_NUMERIC=C
LC_TIME=en_GB.ISO8859-1
LC_COLLATE=en_GB.ISO8859-1
LC_MONETARY=en_GB.ISO8859-1
LC_MESSAGES=C
LC_ALL=
The xml file is generated in linux and because of the business, we couldn't change characterset from ISO8859-1 to UTF-8 from the system side, so do we have any workaround to skip this kind of error, or is it possible to modify characterset to pass the validation in C++?
All the source codes are attached at below, please let me know if you need any more information.
Looking forward to your reply and thank you so much in advance.
Source Code:
a.xsd:
============================================================
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="phonebook">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1">
<xs:complexType>
<xs:sequence>
<xs:element name="first" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
a.xml:
============================================================
<?xml version="1.0" encoding="UTF-8"?>
<phonebook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation=
"gobitan.xsd">
<name>
<first>円短期</first>
</name>
</phonebook>
val.cpp
============================================================
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/validators/common/Grammar.hpp>
#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/util/XMLException.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <stdio.h>
#include "MyHandler.hpp"
#if defined(XERCES_NEW_IOSTREAMS)
#include <iostream>
#else
#include <iostream.h>
#endif
using namespace std;
using namespace xercesc;
//XERCES_CPP_NAMESPACE_USE
int main( int argc , char** argv )
{
XMLPlatformUtils::Initialize(); //.....
SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();
parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
parser->setFeature(XMLUni::fgXercesSchema, true);
parser->setFeature(XMLUni::fgXercesSchemaFullChecking, true);
parser->setFeature(XMLUni::fgXercesLoadSchema,true);
parser->setExitOnFirstFatalError(false);
parser->loadGrammar ("a.xsd", Grammar::SchemaGrammarType, true);
MyHandler* handler=new MyHandler();
parser->setContentHandler(handler);
parser->setErrorHandler(handler);
try
catch (const XMLException& toCatch)
{ char* message = XMLString::transcode(toCatch.getMessage()); cout << "Exception message is: \n" << message << "\n"; XMLString::release(&message); return -1; }catch (const SAXParseException& toCatch)
{ char* message = XMLString::transcode(toCatch.getMessage()); cout << "Exception message is: \n" << message << "\n"; XMLString::release(&message); return -1; }catch (...)
{ cout << "Unexpected Exception \n" ; return -1; } cout<<"FINISH"<<endl;
XMLPlatformUtils::Terminate();
return 0;
}
MyHandler.cpp
============================================================
#include "MyHandler.hpp"
#include <xercesc/sax2/Attributes.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/sax/SAXException.hpp>
#if defined(XERCES_NEW_IOSTREAMS)
#include <iostream>
#else
#include <iostream.h>
#endif
// ---------------------------------------------------------------------------
// MyHandler: Constructors and Destructor
// ---------------------------------------------------------------------------
MyHandler::MyHandler() :
fAttrCount(0)
, fCharacterCount(0)
, fElementCount(0)
, fSpaceCount(0)
, fSchemaErrors(false)
, fSystemException(false)
, eleName("")
, eleValue("")
, curElement("")
, curValue("")
, buf("")
{
}
MyHandler::~MyHandler()
{ } // --------------------------------------------------------------------------- // MyHandler: Implementation of the SAX DocumentHandler interface // --------------------------------------------------------------------------- void MyHandler::startElement(const XMLCh* const uri , const XMLCh* const localname , const XMLCh* const qname , const Attributes& attrs) { curValue = ""; curElement=""; curElement=XMLString::transcode(localname); elementList.push_back(curElement); fElementCount++; fAttrCount += attrs.getLength(); } void MyHandler::endElement( const XMLCh* const uri , const XMLCh* const localname , const XMLCh* const qname) { curElement = XMLString::transcode(localname); elementList.remove(curElement); }void MyHandler::characters( const XMLCh* const chars
, const XMLSize_t length)
void MyHandler::fatalError(const SAXParseException& e)
{
fSchemaErrors = true;
char* message = XMLString::transcode(e.getMessage());
cout << "Fatal Error: " << message << " at line: " << e.getLineNumber()<< endl;
cout<<"FATAL ERROR============================ msg = ["<<message<<"]."<<endl;
XMLString::release(&message);
}
void MyHandler::warning(const SAXParseException& e)
{
char* message = XMLString::transcode(e.getMessage());
cout << "Warning : " << message<< " at line: " << e.getLineNumber()<< endl;
XMLString::release(&message);
}
void MyHandler::resetErrors()
{
fSchemaErrors = false;
fSystemException = false;
vSchemaErrorContent.clear();
vSystemErrorContent.clear();
}
MyHandler.hpp
============================================================
#include <xercesc/sax2/Attributes.hpp>
#include <xercesc/sax2/DefaultHandler.hpp>
#include <string>
#include <vector>
#include <list>
#include <sstream>
using namespace std;
XERCES_CPP_NAMESPACE_USE
class MyHandler : public DefaultHandler
{
public:
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
MyHandler();
~MyHandler();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
XMLSize_t getElementCount() const
XMLSize_t getAttrCount() const
{ return fAttrCount; }XMLSize_t getCharacterCount() const
{ return fCharacterCount; }XMLSize_t getSpaceCount() const
{ return fSpaceCount; }bool hasfSchemaErrors() const
{ return fSchemaErrors; }bool hasfSystemException() const
{ return fSystemException; }vector<string> getSchemaErrorContent() const
{ return vSchemaErrorContent; }vector<string> getSystemErrorContent() const
{ return vSystemErrorContent; } void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
void endElement(const XMLCh* const uri,const XMLCh* const localname,const XMLCh* const qname ) ;
void characters(const XMLCh* const chars, const XMLSize_t length);
void ignorableWhitespace(const XMLCh* const chars, const XMLSize_t length);
void startDocument();
void endDocument();
void warning(const SAXParseException& exc);
void error(const SAXParseException& exc);
void fatalError(const SAXParseException& exc);
void resetErrors();
private:
XMLSize_t fAttrCount;
XMLSize_t fCharacterCount;
XMLSize_t fElementCount;
XMLSize_t fSpaceCount;
bool fSchemaErrors;
bool fSystemException;
vector<string> vSchemaErrorContent;
vector<string> vSystemErrorContent;
string curElement;
string curValue;
list<string> elementList;
};