diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index d3e9992..d0003ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -74,7 +74,6 @@ import org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails; import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; -import org.apache.hadoop.hive.ql.udf.ptf.Noop; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -265,16 +264,19 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, //Since we cannot know what columns will be needed by a PTF chain, //we do not prune columns on PTFOperator for PTF chains. PartitionedTableFunctionDef funcDef = conf.getFuncDef(); - if (!conf.forWindowing() && !Noop.class.isInstance(funcDef.getTFunction())) { + List referencedColumns = funcDef.getReferencedColumns(); + if (!conf.forWindowing() && !conf.forNoop() && referencedColumns == null) { return super.process(nd, stack, cppCtx, nodeOutputs); } - - //we create a copy of prunedCols to create a list of pruned columns for PTFOperator - List prunedCols = - new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0))); - if (funcDef instanceof WindowTableFunctionDef) { + + List prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); + if (conf.forWindowing()) { WindowTableFunctionDef def = (WindowTableFunctionDef) funcDef; prunedCols = Utilities.mergeUniqElems(getWindowFunctionColumns(def), prunedCols); + } else if (conf.forNoop()) { + prunedCols = new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0))); + } else { + prunedCols = referencedColumns; } List newRS = prunedColumnsList(prunedCols, op.getSchema(), funcDef); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java index 483f55b..00b43c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java @@ -342,6 +342,7 @@ private PartitionedTableFunctionDef translate(PartitionedTableFunctionSpec spec, outColNames, outRR); def.setOutputShape(outputShape); + def.setReferencedColumns(tFn.getReferencedColumns()); return def; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java index 2f31eed..5e63f2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; +import org.apache.hadoop.hive.ql.udf.ptf.Noop; import java.util.ArrayList; import java.util.Collections; @@ -97,6 +98,10 @@ public boolean forWindowing() { return funcDef instanceof WindowTableFunctionDef; } + public boolean forNoop() { + return funcDef.getTFunction() instanceof Noop; + } + @Explain(displayName = "Map-side function", displayOnlyOnTrue = true) public boolean isMapSide() { return isMapSide; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java index 967caaa..d1ad20a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java @@ -37,6 +37,8 @@ private OrderDef order; private TableFunctionEvaluator tFunction; boolean transformsRawInput; + + private transient List referencedColumns; @Explain(displayName = "name") public String getName() { @@ -185,4 +187,13 @@ public String getResolverClassName() { public void setResolverClassName(String resolverClassName) { this.resolverClassName = resolverClassName; } + + @Explain(displayName = "referenced columns") + public List getReferencedColumns() { + return referencedColumns; + } + + public void setReferencedColumns(List referencedColumns) { + this.referencedColumns = referencedColumns; + } } \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java index aa48a6c..03f434f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.PTFPartition; import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.PTFTranslator; @@ -195,6 +196,20 @@ public void setupOutputOI() throws SemanticException setOutputOI(OI); } + + @Override + public List getReferencedColumns() throws SemanticException { + MatchPath matchPath = (MatchPath) evaluator; + List columns = new ArrayList<>(); + for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) { + Utilities.mergeUniqElems(columns, exprNode.getCols()); + } + for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) { + Utilities.mergeUniqElems(columns, exprNode.getCols()); + } + return columns; + } + /* * validate and setup patternStr */ @@ -356,6 +371,7 @@ public void setResultExprInfo(ResultExprInfo resultExprInfo) { static class SymbolsInfo { int sz; + ArrayList symbolExprsDecs; ArrayList symbolExprsEvaluators; ArrayList symbolExprsOIs; ArrayList symbolExprsNames; @@ -366,6 +382,7 @@ public void setResultExprInfo(ResultExprInfo resultExprInfo) { symbolExprsEvaluators = new ArrayList(sz); symbolExprsOIs = new ArrayList(sz); symbolExprsNames = new ArrayList(sz); + symbolExprsDecs = new ArrayList<>(sz); } void add(String name, PTFExpressionDef arg) @@ -373,6 +390,7 @@ void add(String name, PTFExpressionDef arg) symbolExprsNames.add(name); symbolExprsEvaluators.add(arg.getExprEvaluator()); symbolExprsOIs.add(arg.getOI()); + symbolExprsDecs.add(arg.getExprNode()); } } @@ -749,8 +767,7 @@ private void setupSelectListInputInfo() throws SemanticException /* * create SelectListOI */ - selectListInputOI = (StructObjectInspector) - PTFTranslator.getStandardStructOI(selectListInputRowResolver); + selectListInputOI = PTFTranslator.getStandardStructOI(selectListInputRowResolver); } private void fixResultExprString() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java index 0b090a9..468b5b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java @@ -29,11 +29,6 @@ public class NoopWithMap extends Noop { - @Override - public PTFPartition execute(PTFPartition iPart) throws HiveException - { - return iPart; - } @Override protected PTFPartition _transformRawInput(PTFPartition iPart) throws HiveException diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java index b8b819e..a0f5a7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /* @@ -60,7 +59,7 @@ */ /** - * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AsbtractTableFunction + * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AbstractTableFunction * class into a Resolver and Evaluator. *

* The Evaluator also holds onto the {@link TableFunctionDef}. This provides information @@ -79,7 +78,7 @@ */ public abstract class TableFunctionEvaluator { /* - * how is this different from the OutpuShape set on the TableDef. + * how is this different from the OutputShape set on the TableDef. * This is the OI of the object coming out of the PTF. * It is put in an output Partition whose Serde is usually LazyBinarySerde. * So the next PTF (or Operator) in the chain gets a LazyBinaryStruct. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java index 969013c..71034d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java @@ -60,7 +60,7 @@ /* * - called during translation. * - invokes createEvaluator which must be implemented by a subclass - * - sets up the evaluator with references to the TableDef, PartitionClass, PartitonMemsize and + * - sets up the evaluator with references to the TableDef, PartitionClass, PartitionMemsize and * the transformsRawInput boolean. */ public void initialize(HiveConf cfg, PTFDesc ptfDesc, PartitionedTableFunctionDef tDef) @@ -193,4 +193,14 @@ public boolean carryForwardNames() { * a subclass must provide the {@link TableFunctionEvaluator} instance. */ protected abstract TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, PartitionedTableFunctionDef tDef); + + /** + * Provide referenced columns names to be used in partition function + * + * @return null for unknown (will get all columns from table including virtual columns) + * @throws SemanticException + */ + public List getReferencedColumns() throws SemanticException { + return null; + } } diff --git a/ql/src/test/results/clientpositive/ptf_matchpath.q.out b/ql/src/test/results/clientpositive/ptf_matchpath.q.out index aaa66cf..dd4b65f 100644 --- a/ql/src/test/results/clientpositive/ptf_matchpath.q.out +++ b/ql/src/test/results/clientpositive/ptf_matchpath.q.out @@ -72,40 +72,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: _col6 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -191,43 +192,44 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -300,47 +302,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float) + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string + output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out b/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out index 22c8af3..f9a9734 100644 --- a/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out +++ b/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out @@ -77,41 +77,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: _col6 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -202,44 +203,45 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -317,48 +319,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float) + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string + output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out b/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out index fc2b034..3a2071d 100644 --- a/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out +++ b/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out @@ -77,41 +77,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: _col6 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -202,44 +203,45 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -317,48 +319,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float) + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string + output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat