Uploaded image for project: 'Apache Avro'
  1. Apache Avro
  2. AVRO-1176

ResolvingDecoder fails to resolve or parse schemas

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 1.7.0
    • 1.9.0
    • c++

    Description

      We have encountered a number of problems using ResolvingDecoder in the C++ project that we can trace to

      1. Incorrectly swapped reader/writer arguments passed to ResolvingGrammarGenerator::generate()
      2. Using the wrong tree in ResolvingGrammarGenerator::generate() to generate the backup parsing stack
      3. A decoder has no "hook" into the generated codec_traits decode methods for Specific that advances the resolved parse tree through the Symbol::sSkipStart nodes to ignore extra or unknown fields in the writer's data.
      4. A resolving decoder can generate a valid decoded object even if there are garbage characters at the end of the input stream if those characters appear in a field that the reader schema is unaware of

      Reader/Writer schemas that fail to parse properly below. First example is the writer adding a field to a record that is inside an array

      Added field to record inside array
      {
          std::string readerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
          std::string writerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}}}]}");
          std::stringstream readerStream(readerString);
          std::stringstream writerStream(writerString);
          
          avro::ValidSchema readerSchema;
          avro::ValidSchema writerSchema;
          
          avro::compileJsonSchema(readerStream, readerSchema);
          avro::compileJsonSchema(writerStream, writerSchema);
          
          avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
          struct Outer outer;
          
          std::stringstream jsonStream("{\"outerArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3}},{\"first\":{\"field\":\"cool\"},\"second\":{\"field\":\"beans\"},\"third\":{\"number\":4}}]}");
          std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
      
          decoder->init(*input);
          avro::decode(*decoder, outer);
      }
      
      Additional array of writer-only record
      {
          std::string readerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
          std::string writerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}");
          std::stringstream readerStream(readerString);
          std::stringstream writerStream(writerString);
          
          avro::ValidSchema readerSchema;
          avro::ValidSchema writerSchema;
          
          avro::compileJsonSchema(readerStream, readerSchema);
          avro::compileJsonSchema(writerStream, writerSchema);
          
          avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
          struct Outer outer;
          
          std::stringstream jsonStream("{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]},{\"first\":{\"field\":\"second item in array\"},\"second\":{\"field\":\"inner2 field of 2\"},\"innerArray\":[{\"number\":4},{\"number\":5}]},{\"first\":{\"field\":\"third item in array\"},\"second\":{\"field\":\"inner2 field of 3\"},\"innerArray\":[{\"number\":6}]}]}");
          std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
      
          decoder->init(*input);
          avro::decode(*decoder, outer);
      }
      
      Multiple nesting of unknown records
      {
          std::string readerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}}]}");
          std::string writerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}}]}");
          std::stringstream readerStream(readerString);
          std::stringstream writerStream(writerString);
          
          avro::ValidSchema readerSchema;
          avro::ValidSchema writerSchema;
          
          avro::compileJsonSchema(readerStream, readerSchema);
          avro::compileJsonSchema(writerStream, writerSchema);
          
          avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
          struct Outer outer;
          
          std::stringstream jsonStream("{\"outerAsField\":{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]}]}}");
          std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
      
          decoder->init(*input);
          avro::decode(*decoder, outer);
      }
      

      The following will generate a proper object according to the reader schema and completely ignores the extraneous characters at the end of the stream.

      Garbage after appended field of new record
      {
          std::string readerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}");
          std::string writerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}");
          std::stringstream readerStream(readerString);
          std::stringstream writerStream(writerString);
          
          avro::ValidSchema readerSchema;
          avro::ValidSchema writerSchema;
          
          avro::compileJsonSchema(readerStream, readerSchema);
          avro::compileJsonSchema(writerStream, writerSchema);
          
          avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
          struct Outer outer;
          
          std::stringstream jsonStream("{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3} GARBAGE_HERE}");
          std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
      
          decoder->init(*input);
          avro::decode(*decoder, outer);
      }
      

      Attachments

        1. AVRO-1176.patch
          6 kB
          Keh-Li Sheng

        Activity

          People

            thiru_mg Thiruvalluvan M. G.
            kehli Keh-Li Sheng
            Votes:
            2 Vote for this issue
            Watchers:
            5 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: