Description
We have encountered a number of problems using ResolvingDecoder in the C++ project that we can trace to
1. Incorrectly swapped reader/writer arguments passed to ResolvingGrammarGenerator::generate()
2. Using the wrong tree in ResolvingGrammarGenerator::generate() to generate the backup parsing stack
3. A decoder has no "hook" into the generated codec_traits decode methods for Specific that advances the resolved parse tree through the Symbol::sSkipStart nodes to ignore extra or unknown fields in the writer's data.
4. A resolving decoder can generate a valid decoded object even if there are garbage characters at the end of the input stream if those characters appear in a field that the reader schema is unaware of
Reader/Writer schemas that fail to parse properly below. First example is the writer adding a field to a record that is inside an array
{ std::string readerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}"); std::string writerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}}}]}"); std::stringstream readerStream(readerString); std::stringstream writerStream(writerString); avro::ValidSchema readerSchema; avro::ValidSchema writerSchema; avro::compileJsonSchema(readerStream, readerSchema); avro::compileJsonSchema(writerStream, writerSchema); avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema)); struct Outer outer; std::stringstream jsonStream("{\"outerArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3}},{\"first\":{\"field\":\"cool\"},\"second\":{\"field\":\"beans\"},\"third\":{\"number\":4}}]}"); std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream); decoder->init(*input); avro::decode(*decoder, outer); }
{ std::string readerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}"); std::string writerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}"); std::stringstream readerStream(readerString); std::stringstream writerStream(writerString); avro::ValidSchema readerSchema; avro::ValidSchema writerSchema; avro::compileJsonSchema(readerStream, readerSchema); avro::compileJsonSchema(writerStream, writerSchema); avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema)); struct Outer outer; std::stringstream jsonStream("{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]},{\"first\":{\"field\":\"second item in array\"},\"second\":{\"field\":\"inner2 field of 2\"},\"innerArray\":[{\"number\":4},{\"number\":5}]},{\"first\":{\"field\":\"third item in array\"},\"second\":{\"field\":\"inner2 field of 3\"},\"innerArray\":[{\"number\":6}]}]}"); std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream); decoder->init(*input); avro::decode(*decoder, outer); }
{ std::string readerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}}]}"); std::string writerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}}]}"); std::stringstream readerStream(readerString); std::stringstream writerStream(writerString); avro::ValidSchema readerSchema; avro::ValidSchema writerSchema; avro::compileJsonSchema(readerStream, readerSchema); avro::compileJsonSchema(writerStream, writerSchema); avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema)); struct Outer outer; std::stringstream jsonStream("{\"outerAsField\":{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]}]}}"); std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream); decoder->init(*input); avro::decode(*decoder, outer); }
The following will generate a proper object according to the reader schema and completely ignores the extraneous characters at the end of the stream.
{ std::string readerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}"); std::string writerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}"); std::stringstream readerStream(readerString); std::stringstream writerStream(writerString); avro::ValidSchema readerSchema; avro::ValidSchema writerSchema; avro::compileJsonSchema(readerStream, readerSchema); avro::compileJsonSchema(writerStream, writerSchema); avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema)); struct Outer outer; std::stringstream jsonStream("{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3} GARBAGE_HERE}"); std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream); decoder->init(*input); avro::decode(*decoder, outer); }