diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 4bcf6bf..30d258f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -30,7 +30,9 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -64,6 +66,7 @@ import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; @@ -76,8 +79,12 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,6 +96,14 @@ private static final Logger LOG = LoggerFactory.getLogger(StatsRulesProcFactory.class.getName()); private static final boolean isDebugEnabled = LOG.isDebugEnabled(); + private static final String AND_UDF = + GenericUDFOPAnd.class.getAnnotation(Description.class).name(); + private static final String OR_UDF = + GenericUDFOPOr.class.getAnnotation(Description.class).name(); + private static final String EQUAL_UDF = + GenericUDFOPEqual.class.getAnnotation(Description.class).name(); + + /** * Collect basic statistics like number of rows, data size and column level statistics from the * table. Also sets the state of the available statistics. Basic and column statistics can have @@ -300,7 +315,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private long evaluateExpression(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List neededCols, - FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException { + FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long newNumRows = 0; Statistics andStats = null; @@ -339,6 +354,15 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred, evaluatedRowCount = newNumRows; } } + } else if (udf instanceof GenericUDFIn) { + // for IN clause, transform to OR/AND form to compute and update stats + ExprNodeDesc unfoldPred = unfoldInClause(pred); + if (unfoldPred != null) { + newNumRows = evaluateExpression(stats, unfoldPred, aspCtx, neededCols, fop, evaluatedRowCount); + } else { + // back to default case + newNumRows = stats.getNumRows() / 2; + } } else if (udf instanceof GenericUDFOPNot) { newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFOPNotNull) { @@ -376,9 +400,89 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred, return newNumRows; } + private ExprNodeDesc unfoldInClause(ExprNodeDesc pred) throws SemanticException { + ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; + + // 1. It is an IN operator, check if it uses STRUCT + List children = fd.getChildren(); + List columns = Lists.newArrayList(); + ExprNodeDesc columnsChild = children.get(0); + boolean multiColumn; + if (columnsChild instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc) columnsChild).getGenericUDF() instanceof GenericUDFStruct) { + for (int j = 0; j < columnsChild.getChildren().size(); j++) { + ExprNodeDesc columnChild = columnsChild.getChildren().get(j); + // If column is not column reference , we bail out + if (!(columnChild instanceof ExprNodeColumnDesc)) { + return null; + } + columns.add(columnChild); + } + multiColumn = true; + } else { + // If column is not column reference , we bail out + if (!(columnsChild instanceof ExprNodeColumnDesc)) { + return null; + } + columns.add(columnsChild); + multiColumn = false; + } + + // 2. Extract columns and values + List newChildren = Lists.newArrayList(); + for (int i = 1; i < children.size(); i++) { + List values = Lists.newArrayList(); + ExprNodeDesc child = children.get(i); + // If value is not a constant, we bail out + if (!(child instanceof ExprNodeConstantDesc)) { + return null; + } + if (multiColumn) { + ExprNodeConstantDesc constantChild = (ExprNodeConstantDesc) child; + List items = (List) constantChild.getWritableObjectInspector().getWritableConstantValue(); + List structTypes = ((StructTypeInfo) constantChild.getTypeInfo()).getAllStructFieldTypeInfos(); + for (int j = 0; j < structTypes.size(); j++) { + values.add(new ExprNodeConstantDesc(structTypes.get(j), items.get(j))); + } + } else { + values.add(child); + } + List comparisons = Lists.newArrayList(); + for (int j = 0; j < columns.size(); j++) { + ExprNodeDesc column = columns.get(j); + ExprNodeDesc value = values.get(j); + ExprNodeDesc comparison = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(EQUAL_UDF).getGenericUDF(), + Lists.newArrayList(column, value)); + comparisons.add(comparison); + } + if (comparisons.size() == 1) { + newChildren.add(comparisons.get(0)); + } else { + newChildren.add(new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), + comparisons)); + } + } + + // 3. Create OR tree if we have more than one expression + ExprNodeDesc output; + if (newChildren.size() == 1) { + output = newChildren.get(0); + } else { + output = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(OR_UDF).getGenericUDF(), + newChildren); + } + return output; + } + private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List neededCols, FilterOperator fop) - throws CloneNotSupportedException { + throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); @@ -677,7 +781,7 @@ private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFun private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List neededCols, - FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException { + FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); @@ -762,7 +866,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, } else if (udf instanceof GenericUDFOPNull) { return evaluateColEqualsNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr - || udf instanceof GenericUDFOPNot) { + || udf instanceof GenericUDFIn || udf instanceof GenericUDFOPNot) { return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount); } } diff --git ql/src/test/results/clientpositive/pointlookup.q.out ql/src/test/results/clientpositive/pointlookup.q.out index 460cc74..3bd2d20 100644 --- ql/src/test/results/clientpositive/pointlookup.q.out +++ ql/src/test/results/clientpositive/pointlookup.q.out @@ -111,14 +111,14 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -177,14 +177,14 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/pointlookup2.q.out ql/src/test/results/clientpositive/pointlookup2.q.out index fb17e72..608cf34 100644 --- ql/src/test/results/clientpositive/pointlookup2.q.out +++ ql/src/test/results/clientpositive/pointlookup2.q.out @@ -169,16 +169,16 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -282,13 +282,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -988,15 +988,15 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) auto parallelism: false @@ -1169,11 +1169,11 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1201,7 +1201,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) auto parallelism: false @@ -1235,13 +1235,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1363,15 +1363,15 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (key) IN (1, 2) (type: boolean) - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) auto parallelism: false @@ -1590,11 +1590,11 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false predicate: (struct(_col0,_col3)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1622,7 +1622,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string) auto parallelism: false @@ -1656,13 +1656,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/pointlookup3.q.out ql/src/test/results/clientpositive/pointlookup3.q.out index d5c4157..3f3bfe6 100644 --- ql/src/test/results/clientpositive/pointlookup3.q.out +++ ql/src/test/results/clientpositive/pointlookup3.q.out @@ -130,16 +130,16 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: aaaa sort order: ++++ - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -245,13 +245,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1152,15 +1152,15 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (ds1) IN ('2000-04-08', '2000-04-09') (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) auto parallelism: false @@ -1171,15 +1171,15 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (key) IN (1, 2) (type: boolean) - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) auto parallelism: false @@ -1337,11 +1337,11 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1369,7 +1369,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string) auto parallelism: false @@ -1403,13 +1403,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 256 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/pointlookup4.q.out ql/src/test/results/clientpositive/pointlookup4.q.out index 0a9bd3e..2d50fee 100644 --- ql/src/test/results/clientpositive/pointlookup4.q.out +++ ql/src/test/results/clientpositive/pointlookup4.q.out @@ -385,16 +385,16 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (struct(ds1,key,ds2)) IN (const struct('2000-04-08',1,'2001-04-08'), const struct('2000-04-09',2,'2001-04-09')) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: aaaa sort order: ++++ - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -500,13 +500,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat