diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index bd610d9..deb5732 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -62,8 +62,11 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -513,9 +516,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } // It's a column. - return new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + return toExprNodeDesc(colInfo); } else { // It's a table alias. // We will process that later in DOT. @@ -547,11 +548,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { // It's a column. - ExprNodeColumnDesc exprNodColDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); - exprNodColDesc.setSkewedCol(colInfo.isSkewedCol()); - return exprNodColDesc; + return toExprNodeDesc(colInfo); } } @@ -559,6 +556,24 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } + private static ExprNodeDesc toExprNodeDesc(ColumnInfo colInfo) { + ObjectInspector inspector = colInfo.getObjectInspector(); + if (inspector instanceof ConstantObjectInspector) { + if (!(inspector instanceof AbstractPrimitiveWritableObjectInspector)) { + // should not be happened + throw new IllegalArgumentException("All constantOIs in here are writable OI"); + } + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector; + Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue(); + return new ExprNodeConstantDesc(colInfo.getType(), poi.getPrimitiveJavaObject(constant)); + } + ExprNodeColumnDesc column = new ExprNodeColumnDesc(colInfo.getType(), colInfo + .getInternalName(), colInfo.getTabAlias(), colInfo + .getIsVirtualCol()); + column.setSkewedCol(colInfo.isSkewedCol()); + return column; + } + /** * Factory method to get ColumnExprProcessor. * @@ -979,7 +994,7 @@ private boolean isDescendant(Node ans, Node des) { return false; } - protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, + protected ExprNodeDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException { RowResolver input = ctx.getInputRR(); String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) @@ -993,8 +1008,7 @@ protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode ex ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; } - return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + return toExprNodeDesc(colInfo); } @Override @@ -1080,16 +1094,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (Map.Entry colMap : columns.entrySet()) { ColumnInfo colInfo = colMap.getValue(); if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(toExprNodeDesc(colInfo)); } } } else { // all columns (select *, for example) for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(toExprNodeDesc(colInfo)); } } } @@ -1127,7 +1139,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, expr.getChildCount() - childrenBegin); for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) { if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) { - children.addAll(((ExprNodeColumnListDesc)nodeOutputs[ci]).getChildren()); + children.addAll(((ExprNodeColumnListDesc) nodeOutputs[ci]).getChildren()); } else { children.add((ExprNodeDesc) nodeOutputs[ci]); } @@ -1142,8 +1154,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - children.add(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + children.add(toExprNodeDesc(colInfo)); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java index f8738cd..be78e73 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java @@ -30,16 +30,17 @@ */ public class ExprNodeColumnListDesc extends ExprNodeDesc { - List columns = new ArrayList(); + // column or constant + final List columns = new ArrayList(); - public void addColumn(ExprNodeColumnDesc column) { + public void addColumn(ExprNodeDesc column) { columns.add(column); } @Override public ExprNodeDesc clone() { ExprNodeColumnListDesc clone = new ExprNodeColumnListDesc(); - clone.columns = new ArrayList(columns); + clone.columns.addAll(columns); return clone; } @@ -73,11 +74,7 @@ public String getTypeString() { @Override public List getCols() { - List cols = new ArrayList(); - for (ExprNodeColumnDesc column : columns) { - cols.add(column.getColumn()); - } - return cols; + throw new IllegalStateException(); } @Override diff --git ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q index 1efa295..1ef81f7 100644 --- ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q +++ ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q @@ -84,3 +84,8 @@ select * from t9; select * from t10; select * from t11; select * from t12; + +-- with CBO +explain +select percentile_approx(key, 0.5) from bucket; +select percentile_approx(key, 0.5) from bucket; diff --git ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out index b2c0281..3a981e7 100644 --- ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out @@ -504,3 +504,68 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t12 #### A masked pattern was here #### [26.0,255.5,479.0,491.0] +PREHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +PREHOOK: type: QUERY +POSTHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: bucket + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: percentile_approx(_col0, 0.5) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select percentile_approx(key, 0.5) from bucket +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +#### A masked pattern was here #### +POSTHOOK: query: select percentile_approx(key, 0.5) from bucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +#### A masked pattern was here #### +255.5