Uploaded image for project: 'Xerces-C++'
  1. Xerces-C++
  2. XERCESC-1570

Problem with acute accent unicode Latin character

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Minor
    • Resolution: Invalid
    • 2.7.0
    • None
    • Miscellaneous
    • None
    • uname -a: Linux aries 2.6.15.4-Abysal #1 SMP Wed Feb 22 17:28:51 CET 2006 i686 GNU/Linux

    Description

      The small example program below demonstrates the problem:
      ---------------------------------------------------------------------------------------------------------------------
      #include <stdio.h>
      #include <locale.h>
      #include <xercesc/util/PlatformUtils.hpp>
      #include <xercesc/util/XMLString.hpp>
      #include <xercesc/dom/DOM.hpp>
      #include <xercesc/framework/MemBufInputSource.hpp>
      #include <xercesc/parsers/XercesDOMParser.hpp>
      #include <xercesc/framework/StdOutFormatTarget.hpp>

      const XMLCh pCore[] =

      { 'C', 'o', 'r', 'e', '\0' }

      ;
      const XMLCh pRoot[] =

      { 'r', 'o', 'o', 't', '\0' }

      ;
      const XMLCh pVers[] =

      { '1', '.', '0', '\0' }

      ;
      const XMLCh pEnc [] =

      { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' }

      ;
      const XMLCh pLS [] =

      { 'L', 'S', '\0' }

      ;
      const XMLCh pLF [] =

      { '\n', '\0' }

      ;

      using namespace xercesc;

      void printf_xml( const DOMDocument *doc )
      {
      DOMImplementation *irl = DOMImplementationRegistry::getDOMImplementation( pLS );
      DOMWriter *theSerializer = ((DOMImplementationLS *)irl)->createDOMWriter();
      theSerializer->setEncoding( pEnc );
      XMLFormatTarget *xmlft = new StdOutFormatTarget();
      theSerializer->writeNode( xmlft, *doc );
      delete xmlft;
      delete theSerializer;
      }

      int main( int argc, char *argv[] )
      {
      XMLPlatformUtils::Initialize( "es_ES" );

      { setlocale( LC_ALL, "es_ES@euro" ); //setlocale( LC_ALL, "es_ES.iso885915@euro" ); // the same problem DOMImplementation *irc = DOMImplementationRegistry::getDOMImplementation( pCore ); DOMDocument *doc = irc->createDocument( 0, 0, 0 ); doc->setVersion( pVers ); doc->setEncoding( pEnc ); DOMElement *de = doc->createElement( pRoot ); static XMLCh *pValue = XMLString::transcode( "1.234´56" ); DOMText *dt = doc->createTextNode( pValue ); XMLString::release( &pValue ); de->appendChild( dt ); doc->appendChild( de ); printf_xml( doc ); doc->release(); }

      XMLPlatformUtils::Terminate();
      return 0;
      }
      ---------------------------------------------------------------------------------------------------------------------
      Keep close attention to the XMLString::transcode() call.
      The value "1.234´56" is passed using the "ACUTE ACCENT" between characters '4' and '5'.
      When the program is run, the result is as follows:
      <?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?><root>1.234Ž56</root>
      Why does the unicode character Ž appear instead of ´?
      Character Ž corresponds to "LATIN CAPITAL LETTER Z WITH CARON"
      and not to "ACUTE ACCENT" that must be ´ on unicode.

      Attachments

        Activity

          People

            Unassigned Unassigned
            jmfcya jose
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: