Uploaded image for project: 'Pig'
  1. Pig
  2. PIG-3320

AVRO: no empty field expressed when loading with AvroStorage using reader schema with extra field that has no default

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Invalid
    • 0.11.2
    • 0.12.0, 0.11.2
    • piggybank
    • AvroStorage

    Description

      Somewhat different use case than PIG-3318:

      Loading with AvroStorage giving a loader schema that relative to the schema in the Avro file had an extra filed w/o default and expected to see an extra empty column, but the schema is as in the avro file w/o the extra column.

      E.g. see the e2e style test, which fails on this:

                              {
                              'num' => 2,
                              # storing using writer schema
                              # loading using reader schema with extra field that has no default
                              'notmq' => 1,
                              'pig' => q\
      a = load ':INPATH:/types/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
      
      -- Store Avro file w. schema
      b1 = foreach a generate id, intnum5;
      c1 = filter b1 by 10 <= id and id < 20;
      describe c1;
      dump c1;
      store c1 into ':OUTPATH:.intermediate_1' USING org.apache.pig.piggybank.storage.avro.AvroStorage('
      {
         "schema" : {  
            "name" : "schema_writing",
            "type" : "record",
            "fields" : [
               {  
                  "name" : "id",
                  "type" : [
                     "null",
                     "int"
                  ]
               },
               {  
                  "name" : "intnum5",
                  "type" : [
                     "null",
                     "int"
                  ]
               }
            ]
         }
      }
      ');
      
      exec;
      
      
      -- Read back what was stored with Avro adding extra field to reader schema
      u = load ':OUTPATH:.intermediate_1' USING org.apache.pig.piggybank.storage.avro.AvroStorage('
      {
         "debug" : 5,
         "schema" : {  
            "name" : "schema_reading",
            "type" : "record",
            "fields" : [
               {  
                  "name" : "id",
                  "type" : [
                     "null",
                     "int"
                  ]
               },
               {  
                  "name" : "intnum5",
                  "type" : [
                     "null",
                     "string"
                  ]
               },
               {
                  "name" : "intnum100",
                  "type" : [
                     "null",
                     "int"
                  ]
               }
            ]
         }
      }
      ');
      describe u;
      dump u;
      store u into ':OUTPATH:';
      \,
      
                              'verify_pig_script' => q\
      a = load ':INPATH:/types/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
      b = filter a by (10 <= id and id < 20);
      c = foreach b generate id, intnum5, '';
      store c into ':OUTPATH:';
      \,
                              },
      

      Attachments

        Activity

          People

            viraj Viraj Bhat
            esoren Egil Sorensen
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: