diff --git contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java index 69b618b..ce445b0 100644 --- contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java +++ contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.AbstractDeserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ReflectionStructObjectInspector; @@ -182,7 +183,7 @@ public static void main(String[] args) { // Text("04ff331638adc13885d6c42059584deabbdeabcd55bf0bee491172a79a87b196 static.zemanta.com [09/Apr/2009:23:12:39 +0000] 65.94.12.181 65a011a29cdf8ec533ec3d1ccaae921c EEE6FFE9B9F9EA29 REST.HEAD.OBJECT readside/loader.js%22+defer%3D%22defer \"HEAD /readside/loader.js\"+defer=\"defer HTTP/1.0\" 403 AccessDenied 231 - 7 - \"-\" \"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)\""); Text sample = new Text( "04ff331638adc13885d6c42059584deabbdeabcd55bf0bee491172a79a87b196 img.zemanta.com [10/Apr/2009:05:34:01 +0000] 70.32.81.92 65a011a29cdf8ec533ec3d1ccaae921c F939A7D698D27C63 REST.GET.OBJECT reblog_b.png \"GET /reblog_b.png?x-id=79ca9376-6326-41b7-9257-eea43d112eb2 HTTP/1.0\" 200 - 1250 1250 160 159 \"-\" \"Firefox 0.8 (Linux)\" useragent=\"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.6) Gecko/20040614 Firefox/0.8\""); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); Object row = serDe.deserialize(sample); System.err.println(serDe.getObjectInspector().getClass().toString()); ReflectionStructObjectInspector oi = (ReflectionStructObjectInspector) serDe diff --git contrib/src/test/org/apache/hadoop/hive/contrib/serde2/TestRegexSerDe.java contrib/src/test/org/apache/hadoop/hive/contrib/serde2/TestRegexSerDe.java index 394ce3f..639fc3a 100644 --- contrib/src/test/org/apache/hadoop/hive/contrib/serde2/TestRegexSerDe.java +++ contrib/src/test/org/apache/hadoop/hive/contrib/serde2/TestRegexSerDe.java @@ -24,6 +24,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -44,7 +45,7 @@ private SerDe createSerDe(String fieldNames, String fieldTypes, schema.setProperty("output.format.string", outputFormatString); RegexSerDe serde = new RegexSerDe(); - serde.initialize(new Configuration(), schema); + SerDeUtils.initializeSerDe(serde, new Configuration(), schema, null); return serde; } diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java index 089a31a..e61e278 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java @@ -115,28 +115,28 @@ public void testHBaseSerDeI() throws SerDeException { HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesI_I(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerialize(serDe, r, p, expectedFieldsData); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesI_II(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerialize(serDe, r, p, expectedFieldsData); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesI_III(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerialize(serDe, r, p, expectedFieldsData); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesI_IV(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerialize(serDe, r, p, expectedFieldsData); } @@ -149,7 +149,7 @@ public void testHBaseSerDeWithTimestamp() throws SerDeException { long putTimestamp = 1; tbl.setProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, Long.toString(putTimestamp)); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); byte [] cfa = "cola".getBytes(); @@ -356,21 +356,21 @@ public void testHBaseSerDeII() throws SerDeException { HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesII_I(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerialize(serDe, r, p, expectedFieldsData); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesII_II(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerialize(serDe, r, p, expectedFieldsData); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesII_III(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerialize(serDe, r, p, expectedFieldsData); } @@ -486,7 +486,7 @@ public void testHBaseSerDeWithHiveMapToHBaseColumnFamily() throws SerDeException HBaseSerDe hbaseSerDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveMapHBaseColumnFamily(); - hbaseSerDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null); deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues); @@ -494,7 +494,7 @@ public void testHBaseSerDeWithHiveMapToHBaseColumnFamily() throws SerDeException hbaseSerDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesForHiveMapHBaseColumnFamilyII(); - hbaseSerDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null); deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues); @@ -615,7 +615,7 @@ public void testHBaseSerDeWithHiveMapToHBaseColumnFamilyII() throws SerDeExcepti HBaseSerDe hbaseSerDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveMapHBaseColumnFamilyII_I(); - hbaseSerDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null); deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData, columnFamilies, columnQualifiersAndValues); @@ -623,7 +623,7 @@ public void testHBaseSerDeWithHiveMapToHBaseColumnFamilyII() throws SerDeExcepti hbaseSerDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesForHiveMapHBaseColumnFamilyII_II(); - hbaseSerDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null); deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData, columnFamilies, columnQualifiersAndValues); @@ -748,7 +748,7 @@ public void testHBaseSerDeWithColumnPrefixes() HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForColumnPrefixes(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); Object notPresentKey = new Text("unwanted_col"); @@ -842,7 +842,7 @@ public void testHBaseSerDeCompositeKeyWithSeparator() throws SerDeException, TEx HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForCompositeKeyWithSeparator(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerializeHBaseCompositeKey(serDe, r, p); } @@ -892,7 +892,7 @@ public void testHBaseSerDeCompositeKeyWithoutSeparator() throws SerDeException, HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForCompositeKeyWithoutSeparator(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerializeHBaseCompositeKey(serDe, r, p); } diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java index fb650dd..9b97939 100644 --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -142,14 +143,17 @@ private static ObjectInspector getObjectInspector(TypeInfo type) throws IOExcept // if the default was decided by the serde static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) throws SerDeException { - serDe.initialize(conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema())); + SerDeUtils.initializeSerDe(serDe, conf, + getSerdeProperties(jobInfo.getTableInfo(), + jobInfo.getOutputSchema()), + null); } static void initializeDeserializer(Deserializer deserializer, Configuration conf, HCatTableInfo info, HCatSchema schema) throws SerDeException { Properties props = getSerdeProperties(info, schema); LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); - deserializer.initialize(conf, props); + SerDeUtils.initializeSerDe(deserializer, conf, props, null); } private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java index e84b789..7e794a4 100644 --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.io.Writable; import org.slf4j.Logger; @@ -123,7 +124,7 @@ public void testRW() throws Exception { HCatRecord r = e.getValue(); HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); + SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null); LOG.info("ORIG: {}", r); @@ -144,7 +145,7 @@ public void testRW() throws Exception { // serialize using another serde, and read out that object repr. LazySimpleSerDe testSD = new LazySimpleSerDe(); - testSD.initialize(conf, tblProps); + SerDeUtils.initializeSerDe(testSD, conf, tblProps, null); Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); LOG.info("THREE: {}", s3); diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java index c1d170a..da5ae97 100644 --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.slf4j.Logger; @@ -143,10 +144,10 @@ public void testRW() throws Exception { HCatRecord r = e.second; HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); + SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null); JsonSerDe jsde = new JsonSerDe(); - jsde.initialize(conf, tblProps); + SerDeUtils.initializeSerDe(jsde, conf, tblProps, null); LOG.info("ORIG:{}", r); @@ -195,10 +196,10 @@ public void testRobustRead() throws Exception { LOG.info("modif tbl props:{}", internalTblProps); JsonSerDe wjsd = new JsonSerDe(); - wjsd.initialize(conf, internalTblProps); + SerDeUtils.initializeSerDe(wjsd, conf, internalTblProps, null); JsonSerDe rjsd = new JsonSerDe(); - rjsd.initialize(conf, tblProps); + SerDeUtils.initializeSerDe(rjsd, conf, tblProps, null); LOG.info("ORIG:{}", r); @@ -266,7 +267,7 @@ public void testLooseJsonReadability() throws Exception { props.put(serdeConstants.LIST_COLUMNS, "s,k"); props.put(serdeConstants.LIST_COLUMN_TYPES, "struct,int"); JsonSerDe rjsd = new JsonSerDe(); - rjsd.initialize(conf, props); + SerDeUtils.initializeSerDe(rjsd, conf, props, null); Text jsonText = new Text("{ \"x\" : \"abc\" , " + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ," diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java index 9dde771..9a89980 100644 --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.io.RCFile; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; @@ -75,7 +76,7 @@ serDe = new ColumnarSerDe(); // Create the SerDe tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); } catch (Exception e) { } } diff --git hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java index 7ba6bb8..36834b1 100644 --- hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java +++ hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.io.BytesWritable; @@ -248,7 +249,7 @@ protected LazySimpleSerDe createSerde(Table tbl, HiveConf conf) Properties tableProps = MetaStoreUtils.getTableMetadata(tbl); tableProps.setProperty("field.delim", String.valueOf(serdeSeparator)); LazySimpleSerDe serde = new LazySimpleSerDe(); - serde.initialize(conf, tableProps); + SerDeUtils.initializeSerDe(serde, conf, tableProps, null); return serde; } catch (SerDeException e) { throw new SerializationError("Error initializing serde", e); diff --git hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictJsonWriter.java hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictJsonWriter.java index 9b26550..6d6beb8 100644 --- hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictJsonWriter.java +++ hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictJsonWriter.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.io.Text; import org.apache.hive.hcatalog.data.JsonSerDe; @@ -95,7 +96,7 @@ private static JsonSerDe createSerde(Table tbl, HiveConf conf) try { Properties tableProps = MetaStoreUtils.getTableMetadata(tbl); JsonSerDe serde = new JsonSerDe(); - serde.initialize(conf, tableProps); + SerDeUtils.initializeSerDe(serde, conf, tableProps, null); return serde; } catch (SerDeException e) { throw new SerializationError("Error initializing serde " + JsonSerDe.class.getName(), e); diff --git jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java index 3215178..e4c2591 100644 --- jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java +++ jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -108,7 +109,7 @@ private void initSerde() throws SQLException { LOG.debug("Column types: " + types); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types); } - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); } catch (Exception ex) { ex.printStackTrace(); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 1bbe02e..2fad510 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -336,7 +337,7 @@ static public Deserializer getDeserializer(Configuration conf, try { Deserializer deserializer = ReflectionUtils.newInstance(conf.getClassByName(lib). asSubclass(Deserializer.class), conf); - deserializer.initialize(conf, MetaStoreUtils.getTableMetadata(table)); + SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null); return deserializer; } catch (RuntimeException e) { throw e; @@ -372,7 +373,8 @@ static public Deserializer getDeserializer(Configuration conf, try { Deserializer deserializer = ReflectionUtils.newInstance(conf.getClassByName(lib). asSubclass(Deserializer.class), conf); - deserializer.initialize(conf, MetaStoreUtils.getPartitionMetadata(part, table)); + SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), + MetaStoreUtils.getPartitionMetadata(part, table)); return deserializer; } catch (RuntimeException e) { throw e; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultFetchFormatter.java ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultFetchFormatter.java index 25385ba..5924bcf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultFetchFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultFetchFormatter.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe; import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.util.ReflectionUtils; @@ -61,7 +62,7 @@ private SerDe initializeSerde(Configuration conf, Properties props) throws Excep serdeProps.put(SERIALIZATION_FORMAT, props.getProperty(SERIALIZATION_FORMAT)); serdeProps.put(SERIALIZATION_NULL_FORMAT, props.getProperty(SERIALIZATION_NULL_FORMAT)); } - serde.initialize(conf, serdeProps); + SerDeUtils.initializeSerDe(serde, conf, serdeProps, null); return serde; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java index b0b0925..772dda6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.util.ReflectionUtils; @@ -131,12 +132,13 @@ protected void initializeOp(Configuration hconf) throws HiveException { TableDesc keyTableDesc = conf.getKeysSerializeInfos().get(newTag); Deserializer inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc .getDeserializerClass(), null); - inputKeyDeserializer.initialize(null, keyTableDesc.getProperties()); + SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null); TableDesc valueTableDesc = conf.getValuesSerializeInfos().get(newTag); Deserializer inputValueDeserializer = ReflectionUtils.newInstance(valueTableDesc .getDeserializerClass(), null); - inputValueDeserializer.initialize(null, valueTableDesc.getProperties()); + SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), + null); List oi = new ArrayList(); oi.add(inputKeyDeserializer.getObjectInspector()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 6daf199..0da886b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.DelegatedObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -240,7 +241,7 @@ public boolean isEmptyTable() { private StructObjectInspector getRowInspectorFromTable(TableDesc table) throws Exception { Deserializer serde = table.getDeserializerClass().newInstance(); - serde.initialize(job, table.getProperties()); + SerDeUtils.initializeSerDe(serde, job, table.getProperties(), null); return createRowInspector(getStructOIFrom(serde.getObjectInspector())); } @@ -261,7 +262,7 @@ private StructObjectInspector getRowInspectorFromPartition(PartitionDesc partiti private StructObjectInspector getRowInspectorFromPartitionedTable(TableDesc table) throws Exception { Deserializer serde = table.getDeserializerClass().newInstance(); - serde.initialize(job, table.getProperties()); + SerDeUtils.initializeSerDe(serde, job, table.getProperties(), null); String pcols = table.getProperties().getProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); String[] partKeys = pcols.trim().split("/"); @@ -427,14 +428,15 @@ private void getNextPath() throws Exception { splitNum = 0; serde = partDesc.getDeserializer(job); - serde.initialize(job, partDesc.getOverlayedProperties()); + SerDeUtils.initializeSerDe(serde, job, partDesc.getTableDesc().getProperties(), + partDesc.getProperties()); if (currTbl != null) { tblSerde = serde; } else { tblSerde = currPart.getTableDesc().getDeserializerClass().newInstance(); - tblSerde.initialize(job, currPart.getTableDesc().getProperties()); + SerDeUtils.initializeSerDe(tblSerde, job, currPart.getTableDesc().getProperties(), null); } ObjectInspector outputOI = ObjectInspectorConverters.getConvertedOI( @@ -448,7 +450,9 @@ private void getNextPath() throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("Creating fetchTask with deserializer typeinfo: " + serde.getObjectInspector().getTypeName()); - LOG.debug("deserializer properties: " + partDesc.getOverlayedProperties()); + LOG.debug("deserializer properties:\ntable properties: " + + partDesc.getTableDesc().getProperties() + "\npartition properties: " + + partDesc.getProperties()); } if (currPart != null) { @@ -704,7 +708,7 @@ public ObjectInspector getOutputObjectInspector() throws HiveException { // Whenever a new partition is being read, a new converter is being created PartitionDesc partition = listParts.get(0); Deserializer tblSerde = partition.getTableDesc().getDeserializerClass().newInstance(); - tblSerde.initialize(job, partition.getTableDesc().getProperties()); + SerDeUtils.initializeSerDe(tblSerde, job, partition.getTableDesc().getProperties(), null); partitionedTableOI = null; ObjectInspector tableOI = tblSerde.getObjectInspector(); @@ -713,7 +717,8 @@ public ObjectInspector getOutputObjectInspector() throws HiveException { for (PartitionDesc listPart : listParts) { partition = listPart; Deserializer partSerde = listPart.getDeserializer(job); - partSerde.initialize(job, listPart.getOverlayedProperties()); + SerDeUtils.initializeSerDe(partSerde, job, partition.getTableDesc().getProperties(), + listPart.getProperties()); partitionedTableOI = ObjectInspectorConverters.getConvertedOI( partSerde.getObjectInspector(), tableOI, oiSettableProperties); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java index e00b7d3..91b2369 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeUtils; public class HashTableDummyOperator extends Operator implements Serializable { private static final long serialVersionUID = 1L; @@ -34,7 +35,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { TableDesc tbl = this.getConf().getTbl(); try { Deserializer serde = tbl.getDeserializerClass().newInstance(); - serde.initialize(hconf, tbl.getProperties()); + SerDeUtils.initializeSerDe(serde, hconf, tbl.getProperties(), null); this.outputObjInspector = serde.getObjectInspector(); initializeChildren(hconf); } catch (Exception e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java index c8003f5..f5d4670 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -177,7 +178,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { TableDesc keyTableDesc = conf.getKeyTblDesc(); SerDe keySerde = (SerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null); - keySerde.initialize(null, keyTableDesc.getProperties()); + SerDeUtils.initializeSerDe(keySerde, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerde, false); for (Byte pos : order) { if (pos == posBigTableAlias) { @@ -186,7 +187,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { mapJoinTables[pos] = new HashMapWrapper(hashTableThreshold, hashTableLoadFactor); TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos); SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null); - valueSerDe.initialize(null, valueTableDesc.getProperties()); + SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext( valueSerDe, hasFilter(pos))); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java index 80ccf5a..4013b7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -261,7 +262,7 @@ public static SerDe getSpillSerDe(byte alias, TableDesc[] spillTableDesc, SerDe sd = (SerDe) ReflectionUtils.newInstance(desc.getDeserializerClass(), null); try { - sd.initialize(null, desc.getProperties()); + SerDeUtils.initializeSerDe(sd, null, desc.getProperties(), null); } catch (SerDeException e) { e.printStackTrace(); return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 055d13e..b93ea7a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.util.ReflectionUtils; /** @@ -117,7 +118,7 @@ public void generateMapMetaData() throws HiveException, SerDeException { TableDesc keyTableDesc = conf.getKeyTblDesc(); SerDe keySerializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null); - keySerializer.initialize(null, keyTableDesc.getProperties()); + SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false); for (int pos = 0; pos < order.length; pos++) { if (pos == posBigTable) { @@ -131,7 +132,7 @@ public void generateMapMetaData() throws HiveException, SerDeException { } SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null); - valueSerDe.initialize(null, valueTableDesc.getProperties()); + SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)); mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 2416948..fc5864a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -184,18 +184,18 @@ private MapOpCtx initObjectInspector(Configuration hconf, MapInputPath ctx, MapOpCtx opCtx = new MapOpCtx(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition - // taking precedence - Properties partProps = isPartitioned(pd) ? - pd.getOverlayedProperties() : pd.getTableDesc().getProperties(); + // taking precedence, in the case of partitioned tables + Properties overlayedProps = + SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties()); Map partSpec = pd.getPartSpec(); - opCtx.tableName = String.valueOf(partProps.getProperty("name")); + opCtx.tableName = String.valueOf(overlayedProps.getProperty("name")); opCtx.partName = String.valueOf(partSpec); Class serdeclass = hconf.getClassByName(pd.getSerdeClassName()); opCtx.deserializer = (Deserializer) serdeclass.newInstance(); - opCtx.deserializer.initialize(hconf, partProps); + SerDeUtils.initializeSerDe(opCtx.deserializer, hconf, td.getProperties(), pd.getProperties()); StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector(); @@ -208,11 +208,12 @@ private MapOpCtx initObjectInspector(Configuration hconf, MapInputPath ctx, // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns - String pcols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); - String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); + String pcolTypes = overlayedProps + .getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); String[] partKeyTypes = pcolTypes.trim().split(":"); if (partKeys.length > partKeyTypes.length) { @@ -300,11 +301,9 @@ private MapOpCtx initObjectInspector(Configuration hconf, MapInputPath ctx, PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile); TableDesc tableDesc = pd.getTableDesc(); Properties tblProps = tableDesc.getProperties(); - // If the partition does not exist, use table properties - Properties partProps = isPartitioned(pd) ? pd.getOverlayedProperties() : tblProps; Class sdclass = hconf.getClassByName(pd.getSerdeClassName()); Deserializer partDeserializer = (Deserializer) sdclass.newInstance(); - partDeserializer.initialize(hconf, partProps); + SerDeUtils.initializeSerDe(partDeserializer, hconf, tblProps, pd.getProperties()); StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) partDeserializer .getObjectInspector(); @@ -313,7 +312,7 @@ private MapOpCtx initObjectInspector(Configuration hconf, MapInputPath ctx, (identityConverterTableDesc.contains(tableDesc))) { sdclass = hconf.getClassByName(tableDesc.getSerdeClassName()); Deserializer tblDeserializer = (Deserializer) sdclass.newInstance(); - tblDeserializer.initialize(hconf, tblProps); + SerDeUtils.initializeSerDe(tblDeserializer, hconf, tblProps, null); tblRawRowObjectInspector = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI( partRawRowObjectInspector, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java index 1354b36..5b857e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.Serializer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.BytesWritable; @@ -242,8 +243,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { scriptOutputDeserializer = conf.getScriptOutputInfo() .getDeserializerClass().newInstance(); - scriptOutputDeserializer.initialize(hconf, conf.getScriptOutputInfo() - .getProperties()); + SerDeUtils.initializeSerDe(scriptOutputDeserializer, hconf, + conf.getScriptOutputInfo().getProperties(), null); scriptInputSerializer = (Serializer) conf.getScriptInputInfo() .getDeserializerClass().newInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java index 3bf58f6..6e531dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -138,7 +139,7 @@ public void initiliaze(Configuration hconf) { try { SerDe serializer = (SerDe) ReflectionUtils.newInstance(tblDesc.get( alias).getDeserializerClass(), null); - serializer.initialize(null, tblDesc.get(alias).getProperties()); + SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null); tblSerializers.put((byte) i, serializer); } catch (SerDeException e) { LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index c52a093..8ee4b98 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -160,6 +160,7 @@ import org.apache.hadoop.hive.ql.stats.StatsPublisher; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.Serializer; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.shims.ShimLoader; @@ -2231,8 +2232,10 @@ public void run() { return; } HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf, - partDesc.getOverlayedProperties().getProperty( - hive_metastoreConstants.META_TABLE_STORAGE)); + SerDeUtils.createOverlayedProperties( + partDesc.getTableDesc().getProperties(), + partDesc.getProperties()) + .getProperty(hive_metastoreConstants.META_TABLE_STORAGE)); if (handler instanceof InputEstimator) { long total = 0; TableDesc tableDesc = partDesc.getTableDesc(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java index 2ef79d4..58d1638 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java @@ -134,7 +134,7 @@ public void configure(JobConf job) { keyTableDesc = gWork.getKeyDesc(); inputKeyDeserializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc .getDeserializerClass(), null); - inputKeyDeserializer.initialize(null, keyTableDesc.getProperties()); + SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null); keyObjectInspector = inputKeyDeserializer.getObjectInspector(); valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()]; for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) { @@ -142,8 +142,8 @@ public void configure(JobConf job) { valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag); inputValueDeserializer[tag] = (SerDe) ReflectionUtils.newInstance( valueTableDesc[tag].getDeserializerClass(), null); - inputValueDeserializer[tag].initialize(null, valueTableDesc[tag] - .getProperties()); + SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, + valueTableDesc[tag].getProperties(), null); valueObjectInspector[tag] = inputValueDeserializer[tag] .getObjectInspector(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 0e4bdff..b5607a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -122,7 +122,7 @@ void init(JobConf jconf, TezProcessorContext processorContext, MRTaskReporter mr keyTableDesc = redWork.getKeyDesc(); inputKeyDeserializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc .getDeserializerClass(), null); - inputKeyDeserializer.initialize(null, keyTableDesc.getProperties()); + SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null); keyObjectInspector = inputKeyDeserializer.getObjectInspector(); reducer.setGroupKeyObjectInspector(keyObjectInspector); valueTableDesc = new TableDesc[redWork.getTagToValueDesc().size()]; @@ -131,8 +131,8 @@ void init(JobConf jconf, TezProcessorContext processorContext, MRTaskReporter mr valueTableDesc[tag] = redWork.getTagToValueDesc().get(tag); inputValueDeserializer[tag] = (SerDe) ReflectionUtils.newInstance( valueTableDesc[tag].getDeserializerClass(), null); - inputValueDeserializer[tag].initialize(null, valueTableDesc[tag] - .getProperties()); + SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, + valueTableDesc[tag].getProperties(), null); valueObjectInspector[tag] = inputValueDeserializer[tag] .getObjectInspector(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 49b8da1..152b817 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -176,7 +177,8 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx Class serdeclass = hiveConf.getClassByName(part.getSerdeClassName()); Deserializer partDeserializer = (Deserializer) serdeclass.newInstance(); - partDeserializer.initialize(hiveConf, partProps); + SerDeUtils.initializeSerDe(partDeserializer, hiveConf, part.getTableDesc().getProperties(), + partProps); StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) partDeserializer .getObjectInspector(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java index f339651..ac052cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java @@ -85,6 +85,7 @@ import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -814,7 +815,7 @@ protected static SerDe createLazyBinarySerDe(Configuration cfg, p.setProperty( org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, serdePropsMap.get(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES)); - serDe.initialize(cfg, p); + SerDeUtils.initializeSerDe(serDe, cfg, p, null); return serDe; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 77305ff..0e0395e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -178,6 +178,7 @@ import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.NullStructSerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -5950,7 +5951,7 @@ Operator genConversionSelectOperator(String dest, QB qb, Operator input, try { Deserializer deserializer = table_desc.getDeserializerClass() .newInstance(); - deserializer.initialize(conf, table_desc.getProperties()); + SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null); oi = (StructObjectInspector) deserializer.getObjectInspector(); } catch (Exception e) { throw new SemanticException(e); @@ -6229,7 +6230,7 @@ private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, try { Deserializer deserializer = table_desc.getDeserializerClass() .newInstance(); - deserializer.initialize(conf, table_desc.getProperties()); + SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null); oi = (StructObjectInspector) deserializer.getObjectInspector(); } catch (Exception e) { throw new SemanticException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java index 3a258e4..154f29a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver; import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver; import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -260,7 +261,7 @@ protected void initialize(ShapeDetails shp, StructObjectInspector OI) throws Hiv try { SerDe serDe = ReflectionUtils.newInstance(hConf.getClassByName(serdeClassName). asSubclass(SerDe.class), hConf); - serDe.initialize(hConf, serDeProps); + SerDeUtils.initializeSerDe(serDe, hConf, serDeProps, null); shp.setSerde(serDe); StructObjectInspector outOI = PTFPartition.setupPartitionOutputOI(serDe, OI); shp.setOI(outOI); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 43cef5c..08a957a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.util.ReflectionUtils; @@ -129,7 +130,7 @@ public Deserializer getDeserializer(Configuration conf) throws Exception { } Deserializer deserializer = ReflectionUtils.newInstance(conf.getClassByName(clazzName) .asSubclass(Deserializer.class), conf); - deserializer.initialize(conf, schema); + SerDeUtils.initializeSerDe(deserializer, conf, getTableDesc().getProperties(), schema); return deserializer; } @@ -168,16 +169,6 @@ public Properties getProperties() { return properties; } - public Properties getOverlayedProperties(){ - if (tableDesc != null) { - Properties overlayedProps = new Properties(tableDesc.getProperties()); - overlayedProps.putAll(getProperties()); - return overlayedProps; - } else { - return getProperties(); - } - } - public void setProperties(final Properties properties) { this.properties = properties; for (Enumeration keys = properties.propertyNames(); keys.hasMoreElements();) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 6144303..39f1793 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; /** @@ -79,7 +80,7 @@ public TableDesc( */ public Deserializer getDeserializer() throws Exception { Deserializer de = getDeserializerClass().newInstance(); - de.initialize(null, properties); + SerDeUtils.initializeSerDe(de, null, properties, null); return de; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestMapJoinTableContainer.java ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestMapJoinTableContainer.java index 755d783..093da55 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestMapJoinTableContainer.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestMapJoinTableContainer.java @@ -26,6 +26,7 @@ import junit.framework.Assert; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.io.Text; import org.junit.Before; @@ -54,12 +55,12 @@ public void setup() throws Exception { Properties keyProps = new Properties(); keyProps.put(serdeConstants.LIST_COLUMNS, "v1"); keyProps.put(serdeConstants.LIST_COLUMN_TYPES, "string"); - keySerde.initialize(null, keyProps); + SerDeUtils.initializeSerDe(keySerde, null, keyProps, null); LazyBinarySerDe valueSerde = new LazyBinarySerDe(); Properties valueProps = new Properties(); valueProps.put(serdeConstants.LIST_COLUMNS, "v1"); valueProps.put(serdeConstants.LIST_COLUMN_TYPES, "string"); - valueSerde.initialize(null, keyProps); + SerDeUtils.initializeSerDe(valueSerde, null, keyProps, null); containerSerde = new MapJoinTableContainerSerDe( new MapJoinObjectSerDeContext(keySerde, false), new MapJoinObjectSerDeContext(valueSerde, false)); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java index cea3529..a404ff0 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -55,7 +56,7 @@ public static void setupClass() throws SerDeException { p.setProperty( org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, COL_TYPES); - serDe.initialize(cfg, p); + SerDeUtils.initializeSerDe(serDe, cfg, p, null); } private PTFRowContainer> rowContainer(int blockSize) diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/Utilities.java ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/Utilities.java index 4fc613e..3a03048 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/Utilities.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/Utilities.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.io.BytesWritable; @@ -51,7 +52,7 @@ static MapJoinKeyObject serde(MapJoinKeyObject key, String columns, String types Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, columns); props.put(serdeConstants.LIST_COLUMN_TYPES, types); - serde.initialize(null, props); + SerDeUtils.initializeSerDe(serde, null, props, null); MapJoinObjectSerDeContext context = new MapJoinObjectSerDeContext(serde, false); key.write(context, out); out.close(); @@ -84,7 +85,7 @@ static MapJoinEagerRowContainer serde( Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, columns); props.put(serdeConstants.LIST_COLUMN_TYPES, types); - serde.initialize(null, props); + SerDeUtils.initializeSerDe(serde, null, props, null); MapJoinObjectSerDeContext context = new MapJoinObjectSerDeContext(serde, true); container.write(context, out); out.close(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java index 7f3cb15..dd54aed 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; @@ -96,7 +97,7 @@ private void initSerde() { try { serDe = new ColumnarSerDe(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); } catch (SerDeException e) { new RuntimeException(e); } @@ -334,7 +335,7 @@ public void TestCtx() throws Exception { // Test VectorizedColumnarSerDe VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe(); - vcs.initialize(this.conf, tbl); + SerDeUtils.initializeSerDe(vcs, this.conf, tbl, null); Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe .getObjectInspector()); BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[]) ((ObjectWritable) w).get(); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java index 5edd265..464bd5e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; @@ -113,7 +114,7 @@ public void setup() throws Exception { serDe = new ColumnarSerDe(); // Create the SerDe tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); try { bytesArray = new byte[][] {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), @@ -427,7 +428,7 @@ public static void main(String[] args) throws Exception { AbstractSerDe serDe = new ColumnarSerDe(); // Create the SerDe Properties tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); String usage = "Usage: RCFile " + "[-count N]" + " file"; if (args.length == 0) { diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 5664f3f..c16021e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -934,7 +935,7 @@ public void testInOutFormat() throws Exception { serde = new OrcSerde(); properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); - serde.initialize(conf, properties); + SerDeUtils.initializeSerDe(serde, conf, properties, null); assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass()); inspector = (StructObjectInspector) serde.getObjectInspector(); assertEquals("struct", inspector.getTypeName()); @@ -1051,7 +1052,7 @@ public void testMROutput() throws Exception { serde = new OrcSerde(); properties.setProperty("columns", "z,r"); properties.setProperty("columns.types", "int:struct"); - serde.initialize(conf, properties); + SerDeUtils.initializeSerDe(serde, conf, properties, null); inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); @@ -1095,7 +1096,7 @@ public void testEmptyFile() throws Exception { properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); SerDe serde = new OrcSerde(); - serde.initialize(conf, properties); + SerDeUtils.initializeSerDe(serde, conf, properties, null); InputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); @@ -1145,7 +1146,7 @@ public void testDefaultTypes() throws Exception { writer.close(true); serde = new OrcSerde(); properties.setProperty("columns", "str,str2"); - serde.initialize(conf, properties); + SerDeUtils.initializeSerDe(serde, conf, properties, null); inspector = (StructObjectInspector) serde.getObjectInspector(); assertEquals("struct", inspector.getTypeName()); InputFormat in = new OrcInputFormat(); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java index be518b9..3b56fc7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -42,7 +43,7 @@ public void testParquetHiveSerDe() throws Throwable { final ParquetHiveSerDe serDe = new ParquetHiveSerDe(); final Configuration conf = new Configuration(); final Properties tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data final Writable[] arr = new Writable[8]; diff --git ql/src/test/queries/clientpositive/avro_partitioned.q ql/src/test/queries/clientpositive/avro_partitioned.q index 6fe5117..f33c481 100644 --- ql/src/test/queries/clientpositive/avro_partitioned.q +++ ql/src/test/queries/clientpositive/avro_partitioned.q @@ -1,4 +1,4 @@ --- verify that new joins bring in correct schemas (including evolved schemas) +-- Verify that table scans work with partitioned Avro tables CREATE TABLE episodes ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' @@ -69,5 +69,77 @@ SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 ORDER BY air_date; SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; -- Fetch w/filter to specific partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; --- Fetch w/non-existant partition +-- Fetch w/non-existent partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; + + +-- Verify that reading from an Avro partition works +-- even if it has an old schema relative to the current table level schema + +-- Create table and store schema in SERDEPROPERTIES +CREATE TABLE episodes_partitioned_serdeproperties +PARTITIONED BY (doctor_pt INT) +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; + +-- Insert data into a partition +INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes; + +-- Evolve the table schema by adding new array field "cast_and_crew" +ALTER TABLE episodes_partitioned_serdeproperties +SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"cast_and_crew", + "type":{"type":"array","items":"string"}, + "default":[] + }, + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}'); + +-- Try selecting from the evolved table +SELECT * FROM episodes_partitioned_serdeproperties ORDER BY air_date; diff --git ql/src/test/results/clientpositive/avro_partitioned.q.out ql/src/test/results/clientpositive/avro_partitioned.q.out index 644716d..da6ecf0 100644 --- ql/src/test/results/clientpositive/avro_partitioned.q.out +++ ql/src/test/results/clientpositive/avro_partitioned.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) +PREHOOK: query: -- Verify that table scans work with partitioned Avro tables CREATE TABLE episodes ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' @@ -29,7 +29,7 @@ TBLPROPERTIES ('avro.schema.literal'='{ }') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -POSTHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) +POSTHOOK: query: -- Verify that table scans work with partitioned Avro tables CREATE TABLE episodes ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' @@ -287,12 +287,12 @@ POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [ POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] The Mysterious Planet 6 September 1986 6 6 -PREHOOK: query: -- Fetch w/non-existant partition +PREHOOK: query: -- Fetch w/non-existent partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@episodes_partitioned #### A masked pattern was here #### -POSTHOOK: query: -- Fetch w/non-existant partition +POSTHOOK: query: -- Fetch w/non-existent partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@episodes_partitioned @@ -318,3 +318,335 @@ POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(ep POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: -- Verify that reading from an Avro partition works +-- even if it has an old schema relative to the current table level schema + +-- Create table and store schema in SERDEPROPERTIES +CREATE TABLE episodes_partitioned_serdeproperties +PARTITIONED BY (doctor_pt INT) +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- Verify that reading from an Avro partition works +-- even if it has an old schema relative to the current table level schema + +-- Create table and store schema in SERDEPROPERTIES +CREATE TABLE episodes_partitioned_serdeproperties +PARTITIONED BY (doctor_pt INT) +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes_partitioned_serdeproperties +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: -- Insert data into a partition +INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes +PREHOOK: Output: default@episodes_partitioned_serdeproperties +POSTHOOK: query: -- Insert data into a partition +INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=1 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=11 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=2 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=4 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=5 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=6 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=9 +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: -- Evolve the table schema by adding new array field "cast_and_crew" +ALTER TABLE episodes_partitioned_serdeproperties +SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"cast_and_crew", + "type":{"type":"array","items":"string"}, + "default":[] + }, + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@episodes_partitioned_serdeproperties +PREHOOK: Output: default@episodes_partitioned_serdeproperties +POSTHOOK: query: -- Evolve the table schema by adding new array field "cast_and_crew" +ALTER TABLE episodes_partitioned_serdeproperties +SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"cast_and_crew", + "type":{"type":"array","items":"string"}, + "default":[] + }, + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@episodes_partitioned_serdeproperties +POSTHOOK: Output: default@episodes_partitioned_serdeproperties +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: -- Try selecting from the evolved table +SELECT * FROM episodes_partitioned_serdeproperties ORDER BY air_date +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned_serdeproperties +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: -- Try selecting from the evolved table +SELECT * FROM episodes_partitioned_serdeproperties ORDER BY air_date +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned_serdeproperties +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +[] The Doctor's Wife 14 May 2011 11 11 +[] An Unearthly Child 23 November 1963 1 1 +[] Rose 26 March 2005 9 9 +[] The Eleventh Hour 3 April 2010 11 11 +[] Horror of Fang Rock 3 September 1977 4 4 +[] Castrolava 4 January 1982 5 5 +[] The Power of the Daleks 5 November 1966 2 2 +[] The Mysterious Planet 6 September 1986 6 6 diff --git serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java index 1ab15a8..28cfe07 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java @@ -32,6 +32,25 @@ public abstract class AbstractSerDe implements SerDe { /** + * Initialize the SerDe. By default, this will use one set of properties, either the + * table properties or the partition properties. If a SerDe needs access to both sets, + * it should override this method. + * + * Eventually, once all SerDes have implemented this method, + * we should convert it to an abstract method. + * + * @param configuration Hadoop configuration + * @param tableProperties Table properties + * @param partitionProperties Partition properties + * @throws SerDeException + */ + public void initialize(Configuration configuration, Properties tableProperties, + Properties partitionProperties) throws SerDeException { + initialize(configuration, + SerDeUtils.createOverlayedProperties(tableProperties, partitionProperties)); + } + + /** * Initialize the HiveSerializer. * * @param conf @@ -40,6 +59,7 @@ * table properties * @throws SerDeException */ + @Deprecated public abstract void initialize(Configuration conf, Properties tbl) throws SerDeException; /** diff --git serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index d226d21..b7fb048 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -21,9 +21,12 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -485,6 +488,38 @@ public static boolean hasAnyNullObject(Object o, ObjectInspector oi) { } } + /** + * Returns the union of table and partition properties, + * with partition properties taking precedence. + * @param tblProps + * @param partProps + * @return the overlayed properties + */ + public static Properties createOverlayedProperties(Properties tblProps, Properties partProps) { + Properties props = new Properties(tblProps); + if (partProps != null) { + props.putAll(partProps); + } + return props; + } + + /** + * Initializes a SerDe. + * @param serde + * @param tblProps + * @param partProps + * @throws SerDeException + */ + public static void initializeSerDe(Deserializer deserializer, Configuration conf, + Properties tblProps, Properties partProps) + throws SerDeException { + if (deserializer instanceof AbstractSerDe) { + ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps); + } else { + deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps)); + } + } + private SerDeUtils() { // prevent instantiation } diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java index 55bfa2e..92799ed 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java @@ -46,6 +46,13 @@ private boolean badSchema = false; @Override + public void initialize(Configuration configuration, Properties tableProperties, + Properties partitionProperties) throws SerDeException { + // Avro should always use the table properties for initialization (see HIVE-6835). + initialize(configuration, tableProperties); + } + + @Override public void initialize(Configuration configuration, Properties properties) throws SerDeException { // Reset member variables so we don't get in a half-constructed state if(schema != null) { diff --git serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java index 9aa3c45..3226114 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java +++ serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java @@ -61,7 +61,7 @@ public void testLazySimpleSerDe() throws Throwable { LazySimpleSerDe serDe = new LazySimpleSerDe(); Configuration conf = new Configuration(); Properties tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\tNULL"); @@ -137,7 +137,7 @@ public void testLazyBinarySerDe() throws Throwable { schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); LazyBinarySerDe serDe = new LazyBinarySerDe(); - serDe.initialize(new Configuration(), schema); + SerDeUtils.initializeSerDe(serDe, new Configuration(), schema, null); deserializeAndSerializeLazyBinary(serDe, rows, rowOI); System.out.println("test: testLazyBinarySerDe - OK"); @@ -183,7 +183,7 @@ public void testColumnarSerDe() throws Throwable { ColumnarSerDe serDe = new ColumnarSerDe(); Configuration conf = new Configuration(); Properties tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data BytesRefArrayWritable braw = new BytesRefArrayWritable(8); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java index a5d494f..072225d 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java @@ -20,6 +20,7 @@ import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -76,7 +77,7 @@ public void initializeDoesNotReuseSchemasFromConf() throws SerDeException { AvroSerDe asd = new AvroSerDe(); - asd.initialize(conf, props); + SerDeUtils.initializeSerDe(asd, conf, props, null); // Verify that the schema now within the configuration is the one passed // in via the properties @@ -133,7 +134,7 @@ public void bothPropertiesSetToNoneReturnsErrorSchema() throws SerDeException { private void verifyErrorSchemaReturned(Properties props) throws SerDeException { AvroSerDe asd = new AvroSerDe(); - asd.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(asd, new Configuration(), props, null); assertTrue(asd.getObjectInspector() instanceof StandardStructObjectInspector); StandardStructObjectInspector oi = (StandardStructObjectInspector)asd.getObjectInspector(); List allStructFieldRefs = oi.getAllStructFieldRefs(); diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java index e512f42..cefb72e 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java @@ -72,7 +72,7 @@ private SerDe getSerDe(String fieldNames, String fieldTypes, String order) schema.setProperty(serdeConstants.SERIALIZATION_SORT_ORDER, order); BinarySortableSerDe serde = new BinarySortableSerDe(); - serde.initialize(new Configuration(), schema); + SerDeUtils.initializeSerDe(serde, new Configuration(), schema, null); return serde; } diff --git serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java index e8639ff..cb6a0fb 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java @@ -74,7 +74,7 @@ public void testSerDe() throws SerDeException { props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 1; @@ -115,7 +115,7 @@ public void testSerDeEmpties() throws SerDeException { props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 101; @@ -150,7 +150,7 @@ public void testLazyBinaryColumnarSerDeWithEmptyBinary() throws SerDeException { props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 101; @@ -183,7 +183,7 @@ public void testSerDeOuterNulls() throws SerDeException { props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi); @@ -207,7 +207,7 @@ public void testSerDeInnerNulls() throws SerDeException { props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 1; @@ -273,7 +273,7 @@ public void testHandlingAlteredSchemas() throws SerDeException { // serialize some data in the schema before it is altered. LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); BeforeStruct bs1 = new BeforeStruct(); bs1.l1 = 1L; @@ -291,7 +291,7 @@ public void testHandlingAlteredSchemas() throws SerDeException { props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); // serialize some data in the schema after it is altered. AfterStruct as = new AfterStruct(); @@ -303,7 +303,7 @@ public void testHandlingAlteredSchemas() throws SerDeException { // fetch operator serde = new LazyBinaryColumnarSerDe(); - serde.initialize(new Configuration(), props); + SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); //fetch the row inserted before schema is altered and verify LazyBinaryColumnarStruct struct1 = (LazyBinaryColumnarStruct) serde diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java index 714045b..ac0b583 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java @@ -585,7 +585,7 @@ private void testNestedinArrayAtLevel(int nestingLevel, Configuration conf = new Configuration(); tableProp.setProperty("columns", "narray"); tableProp.setProperty("columns.types", schema.toString()); - serDe.initialize(conf, tableProp); + SerDeUtils.initializeSerDe(serDe, conf, tableProp, null); //create the serialized string for type byte[] separators = serDe.serdeParams.getSeparators(); diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java index 28eb868..cf86973 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java @@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -57,7 +58,7 @@ public void testLazySimpleSerDe() throws Throwable { tbl.setProperty("columns.types", "tinyint:smallint:int:bigint:double:string:int:string:binary"); tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\tNULL\t"); @@ -124,7 +125,7 @@ public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable { Configuration conf = new Configuration(); Properties tbl = createProperties(); tbl.setProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true"); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t"); @@ -152,7 +153,7 @@ public void testLazySimpleSerDeExtraColumns() throws Throwable { LazySimpleSerDe serDe = new LazySimpleSerDe(); Configuration conf = new Configuration(); Properties tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t"); @@ -180,7 +181,7 @@ public void testLazySimpleSerDeMissingColumns() throws Throwable { LazySimpleSerDe serDe = new LazySimpleSerDe(); Configuration conf = new Configuration(); Properties tbl = createProperties(); - serDe.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data Text t = new Text("123\t456\t789\t1000\t5.3\t"); diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java index 69c891d..02ae6f8 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java @@ -97,7 +97,7 @@ private SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable { schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); LazyBinarySerDe serde = new LazyBinarySerDe(); - serde.initialize(new Configuration(), schema); + SerDeUtils.initializeSerDe(serde, new Configuration(), schema, null); return serde; } diff --git serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java index a69fcb7..c58c427 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java +++ serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; @@ -100,7 +101,7 @@ public void testCompatibleType() throws SerDeException, IOException { tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, LazySimpleSerDe.class.getName()); - serde.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector(); Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams); @@ -154,7 +155,7 @@ public void testIncompatibleType() throws SerDeException, IOException { tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, LazySimpleSerDe.class.getName()); - serde.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector(); Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams); diff --git serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java index dd9610e..6f09b83 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java +++ serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; @@ -100,7 +101,7 @@ public void testCompatibleType() throws SerDeException, IOException { tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, LazySimpleSerDe.class.getName()); - serde.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector(); Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams); @@ -154,7 +155,7 @@ public void testIncompatibleType() throws SerDeException, IOException { tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, LazySimpleSerDe.class.getName()); - serde.initialize(conf, tbl); + SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector(); Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams); diff --git service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index 2a113d5..5e7ee93 100644 --- service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -388,7 +388,7 @@ private SerDe getSerDe() throws SQLException { LOG.debug("Column types: " + types); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types); } - serde.initialize(new HiveConf(), props); + SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null); } catch (Exception ex) { ex.printStackTrace();