diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 28d4789..52ce569 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; import org.apache.hadoop.hive.ql.stats.StatsPublisher; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.Serializer; @@ -355,7 +356,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { parent = Utilities.toTempPath(conf.getDirName()); statsFromRecordWriter = new boolean[numFiles]; serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance(); - serializer.initialize(hconf, conf.getTableInfo().getProperties()); + serializer.initialize(clearNestedColumnPaths(hconf), conf.getTableInfo().getProperties()); outputClass = serializer.getSerializedClass(); if (isLogInfoEnabled) { @@ -1288,4 +1289,17 @@ private void publishStats() throws HiveException { } return new String[] {fspKey, null}; } + + /** + * Check if nested column paths is set for 'conf'. + * If set, create a copy of 'conf' with this property unset. + */ + private Configuration clearNestedColumnPaths(Configuration conf) { + if (conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR) != null) { + Configuration confCopy = new Configuration(conf); + confCopy.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR); + return confCopy; + } + return conf; + } } diff --git ql/src/test/queries/clientpositive/nested_column_pruning.q ql/src/test/queries/clientpositive/nested_column_pruning.q index 28b974e..1ff3b7b 100644 --- ql/src/test/queries/clientpositive/nested_column_pruning.q +++ ql/src/test/queries/clientpositive/nested_column_pruning.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion = none; +set hive.exec.dynamic.partition.mode=nonstrict; -- First, create source tables DROP TABLE IF EXISTS dummy; @@ -110,3 +111,14 @@ SELECT t1.s1.f3.f5, t2.s2.f8 FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 ON t1.s1.f3.f4 = t2.s1.f6 WHERE t2.s2.f8.f9 == TRUE; + +-- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3; +CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET; + +INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1; + +SELECT * FROM nested_tbl_3; diff --git ql/src/test/results/clientpositive/nested_column_pruning.q.out ql/src/test/results/clientpositive/nested_column_pruning.q.out index f01e3ea..586f115 100644 --- ql/src/test/results/clientpositive/nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/nested_column_pruning.q.out @@ -1078,3 +1078,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@nested_tbl_1 #### A masked pattern was here #### 5.0 {"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}} +PREHOOK: query: -- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_3 +PREHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +POSTHOOK: Output: default@nested_tbl_3@f3=4 +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f1 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f2 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +PREHOOK: query: SELECT * FROM nested_tbl_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_3 +PREHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM nested_tbl_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_3 +POSTHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +false foo 4