diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index f8fb475..679cec1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -152,7 +152,7 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { RexNode rexNode = convert(fieldDesc.getDesc()); - if (rexNode instanceof RexCall) { + if (rexNode.getType().isStruct()) { // regular case of accessing nested field in a column return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 9d7307e..c58210b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -27,10 +27,6 @@ import java.util.Map; import java.util.Set; -import com.google.common.annotations.VisibleForTesting; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; @@ -55,6 +51,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDefaultDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -63,10 +60,13 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; /** * The transformation step that does partition pruning. @@ -372,6 +372,12 @@ static ExprNodeDesc compactExpr(ExprNodeDesc expr) { */ static private ExprNodeDesc removeNonPartCols(ExprNodeDesc expr, List partCols, Set referred) { + if (expr instanceof ExprNodeFieldDesc) { + // Column is not a partition column for the table, + // as we do not allow partitions based on complex + // list or struct fields. + return new ExprNodeConstantDesc(expr.getTypeInfo(), null); + } if (expr instanceof ExprNodeColumnDesc) { String column = ((ExprNodeColumnDesc) expr).getColumn(); if (!partCols.contains(column)) { diff --git ql/src/test/queries/clientpositive/row__id.q ql/src/test/queries/clientpositive/row__id.q new file mode 100644 index 0000000..a24219b --- /dev/null +++ ql/src/test/queries/clientpositive/row__id.q @@ -0,0 +1,22 @@ +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +drop table if exists hello_acid; +create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true'); + +insert into hello_acid partition (load_date='2016-03-01') values (1, 1); +insert into hello_acid partition (load_date='2016-03-02') values (2, 2); +insert into hello_acid partition (load_date='2016-03-03') values (3, 3); + +explain +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +explain +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1; + +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1; + diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index f16bb16..565f9ab 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -211,7 +211,7 @@ POSTHOOK: query: SELECT strct.B, count(val) FROM orc_create_complex GROUP BY str POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### -strct.b _c1 +strct.b c1 four 4501 six 4501 two 4501 diff --git ql/src/test/results/clientpositive/nested_column_pruning.q.out ql/src/test/results/clientpositive/nested_column_pruning.q.out index 884d050..f01e3ea 100644 --- ql/src/test/results/clientpositive/nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/nested_column_pruning.q.out @@ -465,7 +465,7 @@ STAGE PLANS: alias: nested_tbl_1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and (s2.f8.f11['key1'] = true)) (type: boolean) + predicate: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and s2.f8.f11['key1']) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s2.f8 (type: struct,f11:map>) @@ -705,12 +705,12 @@ STAGE PLANS: alias: nested_tbl_1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s1 (type: struct,f6:int>) - outputColumnNames: s1 + expressions: s1.f3.f5 (type: double), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(s1.f3.f4) - keys: s1.f3.f5 (type: double) + aggregations: count(_col1) + keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -766,12 +766,12 @@ STAGE PLANS: alias: nested_tbl_1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s1 (type: struct,f6:int>) - outputColumnNames: s1 + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(s1.f3.f4) - keys: s1.f3 (type: struct) + aggregations: count(_col1) + keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -828,12 +828,12 @@ STAGE PLANS: alias: nested_tbl_1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s1 (type: struct,f6:int>) - outputColumnNames: s1 + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(s1.f3.f4) - keys: s1.f3 (type: struct) + aggregations: count(_col1) + keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -919,38 +919,43 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: s1.f3.f4 is not null (type: boolean) + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: s1.f3.f4 (type: int) + key expressions: _col0.f3.f4 (type: int) sort order: + - Map-reduce partition columns: s1.f3.f4 (type: int) + Map-reduce partition columns: _col0.f3.f4 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: s1 (type: struct,f6:int>) + value expressions: _col0 (type: struct,f6:int>) TableScan alias: t2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (s1.f6 is not null and (s2.f8.f9 = false)) (type: boolean) + predicate: (s2.f8.f9 = false) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: s1.f6 (type: int) - sort order: + - Map-reduce partition columns: s1.f6 (type: int) + Select Operator + expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: s2 (type: struct,f11:map>>) + Reduce Output Operator + key expressions: _col0.f6 (type: int) + sort order: + + Map-reduce partition columns: _col0.f6 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct,f11:map>>) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 s1.f3.f4 (type: int) - 1 s1.f6 (type: int) - outputColumnNames: _col1, _col9 + 0 _col0.f3.f4 (type: int) + 1 _col0.f6 (type: int) + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1.f3.f5 (type: double), _col9.f8 (type: struct,f11:map>) + expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1005,38 +1010,43 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: s1.f3.f4 is not null (type: boolean) + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: s1.f3.f4 (type: int) + key expressions: _col0.f3.f4 (type: int) sort order: + - Map-reduce partition columns: s1.f3.f4 (type: int) + Map-reduce partition columns: _col0.f3.f4 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: s1 (type: struct,f6:int>) + value expressions: _col0 (type: struct,f6:int>) TableScan alias: t2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (s1.f6 is not null and (s2.f8.f9 = true)) (type: boolean) + predicate: (s2.f8.f9 = true) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: s1.f6 (type: int) - sort order: + - Map-reduce partition columns: s1.f6 (type: int) + Select Operator + expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: s2 (type: struct,f11:map>>) + Reduce Output Operator + key expressions: _col0.f6 (type: int) + sort order: + + Map-reduce partition columns: _col0.f6 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct,f11:map>>) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 s1.f3.f4 (type: int) - 1 s1.f6 (type: int) - outputColumnNames: _col1, _col9 + 0 _col0.f3.f4 (type: int) + 1 _col0.f6 (type: int) + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1.f3.f5 (type: double), _col9.f8 (type: struct,f11:map>) + expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/row__id.q.out ql/src/test/results/clientpositive/row__id.q.out new file mode 100644 index 0000000..2289883 --- /dev/null +++ ql/src/test/results/clientpositive/row__id.q.out @@ -0,0 +1,156 @@ +PREHOOK: query: drop table if exists hello_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists hello_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hello_acid +POSTHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hello_acid +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +1 diff --git ql/src/test/results/clientpositive/vector_complex_all.q.out ql/src/test/results/clientpositive/vector_complex_all.q.out index 7ce707a..69d5576 100644 --- ql/src/test/results/clientpositive/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/vector_complex_all.q.out @@ -211,7 +211,7 @@ POSTHOOK: query: SELECT strct.B, count(val) FROM orc_create_complex GROUP BY str POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### -strct.b _c1 +strct.b c1 four 4501 six 4501 two 4501