Uploaded image for project: 'Xerces-C++'
  1. Xerces-C++
  2. XERCESC-2030

failed to do validation when there's Japanese words in the xml file

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Not A Problem
    • None
    • None
    • SAX/SAX2
    • None
    • SunOS 5.10 Generic_139555-08 sun4u sparc SUNW,Sun-Fire-V245
      xerces C++ 3.1.1

    Description

      Hi owners,

      I got a problem when using Xerces C++ 3.1.1 to do schema validation which has Japanese words in the xml file. it raised FatalError: invalid multi-byte sequence and stop validation.
      Environment: Linux
      Locale:
      LANG=
      LC_CTYPE=en_GB.ISO8859-1
      LC_NUMERIC=C
      LC_TIME=en_GB.ISO8859-1
      LC_COLLATE=en_GB.ISO8859-1
      LC_MONETARY=en_GB.ISO8859-1
      LC_MESSAGES=C
      LC_ALL=

      The xml file is generated in linux and because of the business, we couldn't change characterset from ISO8859-1 to UTF-8 from the system side, so do we have any workaround to skip this kind of error, or is it possible to modify characterset to pass the validation in C++?
      All the source codes are attached at below, please let me know if you need any more information.
      Looking forward to your reply and thank you so much in advance.

      Source Code:
      a.xsd:
      ============================================================
      <?xml version="1.0" encoding="UTF-8"?>
      <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
      <xs:element name="phonebook">
      <xs:complexType>
      <xs:sequence>
      <xs:element name="name" minOccurs="1" maxOccurs="1">
      <xs:complexType>
      <xs:sequence>
      <xs:element name="first" type="xs:string"/>
      </xs:sequence>
      </xs:complexType>
      </xs:element>
      </xs:sequence>
      </xs:complexType>
      </xs:element>
      </xs:schema>

      a.xml:
      ============================================================
      <?xml version="1.0" encoding="UTF-8"?>
      <phonebook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:noNamespaceSchemaLocation=
      "gobitan.xsd">
      <name>
      <first>円短期</first>
      </name>
      </phonebook>

      val.cpp
      ============================================================
      #include <xercesc/util/PlatformUtils.hpp>
      #include <xercesc/validators/common/Grammar.hpp>
      #include <xercesc/sax2/SAX2XMLReader.hpp>
      #include <xercesc/util/XMLException.hpp>
      #include <xercesc/util/OutOfMemoryException.hpp>
      #include <xercesc/util/XMLString.hpp>
      #include <xercesc/sax2/XMLReaderFactory.hpp>
      #include <stdio.h>
      #include "MyHandler.hpp"
      #if defined(XERCES_NEW_IOSTREAMS)
      #include <iostream>
      #else
      #include <iostream.h>
      #endif
      using namespace std;
      using namespace xercesc;
      //XERCES_CPP_NAMESPACE_USE

      int main( int argc , char** argv )
      {
      XMLPlatformUtils::Initialize(); //.....
      SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();

      parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
      parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
      parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
      parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
      parser->setFeature(XMLUni::fgXercesSchema, true);
      parser->setFeature(XMLUni::fgXercesSchemaFullChecking, true);
      parser->setFeature(XMLUni::fgXercesLoadSchema,true);
      parser->setExitOnFirstFatalError(false);
      parser->loadGrammar ("a.xsd", Grammar::SchemaGrammarType, true);

      MyHandler* handler=new MyHandler();
      parser->setContentHandler(handler);
      parser->setErrorHandler(handler);
      try

      { parser->parse("a.xml"); vector<string> errs=handler->getSchemaErrorContent(); if(errs.size()>0) { cout<<"ERROR MESSAGE OF SCHEMA VALIDATION============="<<endl; for (unsigned int i = 0; i < errs.size();i++) { cout<<errs.at(i)<<endl; } } cout<<"END TRY"<<endl; }

      catch (const XMLException& toCatch)

      { char* message = XMLString::transcode(toCatch.getMessage()); cout << "Exception message is: \n" << message << "\n"; XMLString::release(&message); return -1; }

      catch (const SAXParseException& toCatch)

      { char* message = XMLString::transcode(toCatch.getMessage()); cout << "Exception message is: \n" << message << "\n"; XMLString::release(&message); return -1; }

      catch (...)

      { cout << "Unexpected Exception \n" ; return -1; }

      cout<<"FINISH"<<endl;
      XMLPlatformUtils::Terminate();

      return 0;
      }

      MyHandler.cpp
      ============================================================
      #include "MyHandler.hpp"
      #include <xercesc/sax2/Attributes.hpp>
      #include <xercesc/sax/SAXParseException.hpp>
      #include <xercesc/sax/SAXException.hpp>
      #if defined(XERCES_NEW_IOSTREAMS)
      #include <iostream>
      #else
      #include <iostream.h>
      #endif

      // ---------------------------------------------------------------------------
      // MyHandler: Constructors and Destructor
      // ---------------------------------------------------------------------------
      MyHandler::MyHandler() :

      fAttrCount(0)
      , fCharacterCount(0)
      , fElementCount(0)
      , fSpaceCount(0)
      , fSchemaErrors(false)
      , fSystemException(false)
      , eleName("")
      , eleValue("")
      , curElement("")
      , curValue("")
      , buf("")
      {

      }

      MyHandler::~MyHandler()

      { } // --------------------------------------------------------------------------- // MyHandler: Implementation of the SAX DocumentHandler interface // --------------------------------------------------------------------------- void MyHandler::startElement(const XMLCh* const uri , const XMLCh* const localname , const XMLCh* const qname , const Attributes& attrs) { curValue = ""; curElement=""; curElement=XMLString::transcode(localname); elementList.push_back(curElement); fElementCount++; fAttrCount += attrs.getLength(); } void MyHandler::endElement( const XMLCh* const uri , const XMLCh* const localname , const XMLCh* const qname) { curElement = XMLString::transcode(localname); elementList.remove(curElement); }

      void MyHandler::characters( const XMLCh* const chars
      , const XMLSize_t length)

      { fCharacterCount += length; curValue = StrUtil(chars); } void MyHandler::ignorableWhitespace( const XMLCh* const /* chars */ , const XMLSize_t length) { fSpaceCount += length; } void MyHandler::startDocument() { fAttrCount = 0; fCharacterCount = 0; fElementCount = 0; fSpaceCount = 0; eleName=""; eleValue=""; curElement=""; curValue=""; elementList.clear(); cout<<"Start to Parse File*****"<<endl; } void MyHandler::endDocument() { cout<<"Finish Parse File*****"<<endl; } // --------------------------------------------------------------------------- // MyHandler: Overrides of the SAX ErrorHandler interface // --------------------------------------------------------------------------- void MyHandler::error(const SAXParseException& e) { string tmp; string message = StrUtil(e.getMessage()); tmp.append( "Error: " +message); tmp.append( " curElement = [" + curElement + "] element, curValue = ["+ curValue+ "]."); vSchemaErrorContent.push_back(tmp); cout<<"ERROR======================== msg = ["<<tmp<<"]."<<endl; }

      void MyHandler::fatalError(const SAXParseException& e)
      {
      fSchemaErrors = true;
      char* message = XMLString::transcode(e.getMessage());
      cout << "Fatal Error: " << message << " at line: " << e.getLineNumber()<< endl;
      cout<<"FATAL ERROR============================ msg = ["<<message<<"]."<<endl;
      XMLString::release(&message);
      }

      void MyHandler::warning(const SAXParseException& e)
      {
      char* message = XMLString::transcode(e.getMessage());
      cout << "Warning : " << message<< " at line: " << e.getLineNumber()<< endl;
      XMLString::release(&message);
      }

      void MyHandler::resetErrors()
      {
      fSchemaErrors = false;
      fSystemException = false;
      vSchemaErrorContent.clear();
      vSystemErrorContent.clear();
      }

      MyHandler.hpp
      ============================================================
      #include <xercesc/sax2/Attributes.hpp>
      #include <xercesc/sax2/DefaultHandler.hpp>
      #include <string>
      #include <vector>
      #include <list>
      #include <sstream>
      using namespace std;
      XERCES_CPP_NAMESPACE_USE
      class MyHandler : public DefaultHandler
      {
      public:
      // -----------------------------------------------------------------------
      // Constructors and Destructor
      // -----------------------------------------------------------------------
      MyHandler();
      ~MyHandler();

      // -----------------------------------------------------------------------
      // Getter methods
      // -----------------------------------------------------------------------
      XMLSize_t getElementCount() const

      { return fElementCount; }

      XMLSize_t getAttrCount() const

      { return fAttrCount; }

      XMLSize_t getCharacterCount() const

      { return fCharacterCount; }

      XMLSize_t getSpaceCount() const

      { return fSpaceCount; }

      bool hasfSchemaErrors() const

      { return fSchemaErrors; }

      bool hasfSystemException() const

      { return fSystemException; }

      vector<string> getSchemaErrorContent() const

      { return vSchemaErrorContent; }

      vector<string> getSystemErrorContent() const

      { return vSystemErrorContent; }

      void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
      void endElement(const XMLCh* const uri,const XMLCh* const localname,const XMLCh* const qname ) ;

      void characters(const XMLCh* const chars, const XMLSize_t length);
      void ignorableWhitespace(const XMLCh* const chars, const XMLSize_t length);
      void startDocument();
      void endDocument();

      void warning(const SAXParseException& exc);
      void error(const SAXParseException& exc);
      void fatalError(const SAXParseException& exc);
      void resetErrors();

      private:
      XMLSize_t fAttrCount;
      XMLSize_t fCharacterCount;
      XMLSize_t fElementCount;
      XMLSize_t fSpaceCount;

      bool fSchemaErrors;
      bool fSystemException;
      vector<string> vSchemaErrorContent;
      vector<string> vSystemErrorContent;

      string curElement;
      string curValue;
      list<string> elementList;
      };

      Attachments

        Activity

          People

            Unassigned Unassigned
            ocean_helen ocean_helen
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: