diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index f8fb475..38b2c02 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -36,6 +36,7 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlCollation; @@ -152,7 +153,7 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { RexNode rexNode = convert(fieldDesc.getDesc()); - if (rexNode instanceof RexCall) { + if (rexNode instanceof RexCall || rexNode instanceof RexInputRef) { // regular case of accessing nested field in a column return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 9d7307e..c58210b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -27,10 +27,6 @@ import java.util.Map; import java.util.Set; -import com.google.common.annotations.VisibleForTesting; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; @@ -55,6 +51,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDefaultDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -63,10 +60,13 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; /** * The transformation step that does partition pruning. @@ -372,6 +372,12 @@ static ExprNodeDesc compactExpr(ExprNodeDesc expr) { */ static private ExprNodeDesc removeNonPartCols(ExprNodeDesc expr, List partCols, Set referred) { + if (expr instanceof ExprNodeFieldDesc) { + // Column is not a partition column for the table, + // as we do not allow partitions based on complex + // list or struct fields. + return new ExprNodeConstantDesc(expr.getTypeInfo(), null); + } if (expr instanceof ExprNodeColumnDesc) { String column = ((ExprNodeColumnDesc) expr).getColumn(); if (!partCols.contains(column)) { diff --git ql/src/test/queries/clientpositive/row__id.q ql/src/test/queries/clientpositive/row__id.q new file mode 100644 index 0000000..1d7ce9e --- /dev/null +++ ql/src/test/queries/clientpositive/row__id.q @@ -0,0 +1,21 @@ +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +drop table if exists hello_acid; +create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true'); + +insert into hello_acid partition (load_date='2016-03-01') values (1, 1); +insert into hello_acid partition (load_date='2016-03-02') values (2, 2); +insert into hello_acid partition (load_date='2016-03-03') values (3, 3); + +explain +select tid from (select row__id.transactionid as tid from hello_acid) sub; + +select tid from (select row__id.transactionid as tid from hello_acid) sub; + +explain +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1; + +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1; diff --git ql/src/test/results/clientpositive/row__id.q.out ql/src/test/results/clientpositive/row__id.q.out new file mode 100644 index 0000000..6d2a5dd --- /dev/null +++ ql/src/test/results/clientpositive/row__id.q.out @@ -0,0 +1,147 @@ +PREHOOK: query: drop table if exists hello_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists hello_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hello_acid +POSTHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hello_acid +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +2 +3 +1 +PREHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 2902 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2902 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +1