Uploaded image for project: 'Parquet'
  1. Parquet
  2. PARQUET-665

Parquet-mr: Protobuf 3 support

    XMLWordPrintableJSON

    Details

    • Type: Improvement
    • Status: Resolved
    • Priority: Major
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: 1.9.0
    • Component/s: parquet-mr
    • Labels:
      None
    • Flags:
      Patch

      Description

      Does parquet-mr support Protobuf version 3? I've applied the following patch and the tests are failing mostly due to optional vs required.

      diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml
      index b3e4e50..aa67423 100644
      --- a/parquet-protobuf/pom.xml
      +++ b/parquet-protobuf/pom.xml
      @@ -31,7 +31,7 @@
       
         <properties>
           <elephant-bird.version>4.4</elephant-bird.version>
      -    <protobuf.version>2.5.0</protobuf.version>
      +    <protobuf.version>3.0.0-beta-4</protobuf.version>
         </properties>
       
       
      diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
      index 5c6ebca..7e2557f 100644
      --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
      +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
      @@ -88,7 +88,7 @@ public class ProtoInputOutputFormatTest {
       
       
           //test that only requested fields were deserialized
      -    assertTrue(readDocument.hasDocId());
      +    assertTrue(readDocument.getDocId() == 12345);
           assertTrue("Found data outside projection.", readDocument.getNameCount() == 0);
         }
       
      diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
      index 5318bd2..1cbb972 100644
      --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
      +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
      @@ -183,16 +183,16 @@ public class ProtoRecordConverterTest {
           TestProtobuf.InnerMessage third = result.getInner(2);
       
           assertEquals("First inner", first.getOne());
      -    assertFalse(first.hasTwo());
      -    assertFalse(first.hasThree());
      +    assertEquals(first.getTwo(), "");
      +    assertEquals(first.getThree(), "");
       
           assertEquals("Second inner", second.getTwo());
      -    assertFalse(second.hasOne());
      -    assertFalse(second.hasThree());
      +    assertEquals(second.getOne(), "");
      +    assertEquals(second.getThree(), "");
       
           assertEquals("Third inner", third.getThree());
      -    assertFalse(third.hasOne());
      -    assertFalse(third.hasTwo());
      +    assertEquals(third.getOne(), "");
      +    assertEquals(third.getTwo(), "");
         }
       
       
      diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto b/parquet-protobuf/src/test/resources/TestProtobuf.proto
      index afa0f63..caf7926 100644
      --- a/parquet-protobuf/src/test/resources/TestProtobuf.proto
      +++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto
      @@ -9,7 +9,7 @@
       //
       //   http://www.apache.org/licenses/LICENSE-2.0
       //
      -// Unless required by applicable law or agreed to in writing,
      +// Unless by applicable law or agreed to in writing,
       // software distributed under the License is distributed on an
       // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
       // KIND, either express or implied.  See the License for the
      @@ -17,6 +17,8 @@
       // under the License.
       //
       
      +syntax = "proto3";
      +
       package TestProtobuf;
       
       option java_package = "org.apache.parquet.proto.test";
      @@ -25,17 +27,18 @@ option java_package = "org.apache.parquet.proto.test";
       // messages but groups were deprecated.
       
       message Document {
      -    required int64 DocId = 1;
      -    optional Links links = 32;
      -    repeated group Name = 24 {
      +    int64 DocId = 1;
      +    Links links = 32;
      +    message Name  {
               repeated Language name = 4;
      -        optional string url = 5;
      +        string url = 5;
           }
      +    repeated Name name = 24;
       }
       
       message Language {
      -    required string code = 12;
      -    optional string Country = 14;
      +    string code = 12;
      +    string Country = 14;
       }
       
       message Links {
      @@ -47,42 +50,43 @@ message Links {
       // begin - protocol buffers for ProtoSchemaConverterTest
       
        message SchemaConverterSimpleMessage {
      -     optional int32 someId = 3;
      +     int32 someId = 3;
        }
       
        message SchemaConverterAllDatatypes {
      -     optional double optionalDouble = 1;
      -     optional float optionalFloat = 2;
      -     optional int32 optionalInt32 = 3;
      -     optional int64 optionalInt64 = 4;
      -     optional uint32 optionalUInt32 = 5;
      -     optional uint64 optionalUInt64 = 6;
      -     optional sint32 optionalSInt32 = 7;
      -     optional sint64 optionalSInt64 = 8;
      -     optional fixed32 optionalFixed32 = 9;
      -     optional fixed64 optionalFixed64 = 10;
      -     optional sfixed32 optionalSFixed32 = 11;
      -     optional sfixed64 optionalSFixed64 = 12;
      -     optional bool optionalBool = 13;
      -     optional string optionalString = 14;
      -     optional bytes optionalBytes = 15;
      -     optional SchemaConverterSimpleMessage optionalMessage = 16;
      -     optional group PbGroup  = 17 {
      -       optional int32 groupInt = 2;
      +     double optionalDouble = 1;
      +     float optionalFloat = 2;
      +     int32 optionalInt32 = 3;
      +     int64 optionalInt64 = 4;
      +     uint32 optionalUInt32 = 5;
      +     uint64 optionalUInt64 = 6;
      +     sint32 optionalSInt32 = 7;
      +     sint64 optionalSInt64 = 8;
      +     fixed32 optionalFixed32 = 9;
      +     fixed64 optionalFixed64 = 10;
      +     sfixed32 optionalSFixed32 = 11;
      +     sfixed64 optionalSFixed64 = 12;
      +     bool optionalBool = 13;
      +     string optionalString = 14;
      +     bytes optionalBytes = 15;
      +     SchemaConverterSimpleMessage optionalMessage = 16;
      +     message PbGroup {
      +       int32 groupInt = 2;
            }
      +     PbGroup pbGroup = 17;
           enum TestEnum {
               FIRST = 0;
               SECOND = 1;
           }
      -    optional TestEnum optionalEnum = 18;
      +    TestEnum optionalEnum = 18;
        }
       
        message SchemaConverterRepetition {
      -     optional int32 optionalPrimitive = 1;
      -     required int32 requiredPrimitive = 2;
      +     int32 optionalPrimitive = 1;
      +     int32 requiredPrimitive = 2;
            repeated int32 repeatedPrimitive = 3;
      -     optional SchemaConverterSimpleMessage optionalMessage = 7;
      -     required SchemaConverterSimpleMessage requiredMessage = 8;
      +     SchemaConverterSimpleMessage optionalMessage = 7;
      +     SchemaConverterSimpleMessage requiredMessage = 8;
            repeated SchemaConverterSimpleMessage repeatedMessage = 9;
        }
       
      @@ -92,22 +96,22 @@ message Links {
       //begin protocol buffers for ProtoInputOutputFormatTest
       
       message InputOutputMsgFormat {
      -    optional int32 someId = 3;
      +    int32 someId = 3;
       }
       
       message IOFormatMessage {
      -    optional double optionalDouble = 1;
      +    double optionalDouble = 1;
           repeated string repeatedString = 2;
      -    optional InputOutputMsgFormat msg = 3;
      +    InputOutputMsgFormat msg = 3;
        }
       
       //end protocol buffers for ProtoInputOutputFormatTest
       
       
       message InnerMessage {
      -    optional string one = 1;
      -    optional string two = 2;
      -    optional string three = 3;
      +    string one = 1;
      +    string two = 2;
      +    string three = 3;
       }
       
       message TopMessage {
      @@ -115,7 +119,7 @@ message TopMessage {
       }
       
       message MessageA {
      -    optional InnerMessage inner = 123;
      +    InnerMessage inner = 123;
       }
       
       message RepeatedIntMessage {
      @@ -129,11 +133,11 @@ message HighIndexMessage {
       //custom proto class - ProtoInputOutputFormatTest
       
       message FirstCustomClassMessage {
      -    optional string string = 11;
      +    string string = 11;
       }
       
       message SecondCustomClassMessage {
      -    optional string string = 11;
      +    string string = 11;
       }
       
       //please place your unit test Protocol Buffer definitions here.
      

        Attachments

          Activity

            People

            • Assignee:
              kalbasit Wael Nasreddine
              Reporter:
              kalbasit Wael Nasreddine
            • Votes:
              1 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved: