diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9cc7987..ac6b901 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1186,6 +1186,8 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Whether to transitively replicate predicate filters over equijoin conditions."), HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true, "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."), + HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true, + "Whether to transform OR clauses in Filter operators into IN clauses"), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index c4e11b9..14f362f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -81,6 +81,12 @@ public void initialize(HiveConf hiveConf) { // are combined and may become eligible for reduction (like is not null filter). transformations.add(new ConstantPropagate()); } + + // Try to transform OR predicates in Filter into IN clauses. + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { + transformations.add(new PointLookupOptimizer()); + } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { transformations.add(new PartitionPruner()); transformations.add(new PartitionConditionRemover()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 71a6c73..825938a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** @@ -379,7 +380,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } - if (has_part_col) { + if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) { // we need to evaluate result for every pruned partition if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) { // if the return type of the GenericUDF is boolean and all partitions agree on diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index fd51628..7262164 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,7 +55,8 @@ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getPredicate(); + ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); + predicate = predicate == null ? fop.getConf().getPredicate() : predicate; String alias = top.getConf().getAlias(); // Generate the partition pruning predicate diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 5408dc8..6a31689 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -70,14 +70,16 @@ public Object clone() { SampleDesc desc = new SampleDesc(numerator, denominator, null, inputPruning); return desc; } - + + @Override public String toString() { - return inputPruning ? "BUCKET " + numerator + " OUT OF " + denominator: null; + return inputPruning ? "BUCKET " + numerator + " OUT OF " + denominator: null; } } private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; + private transient ExprNodeDesc origPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -149,6 +151,14 @@ public void setSortedFilter(boolean isSortedFilter) { this.isSortedFilter = isSortedFilter; } + public void setOrigPredicate(ExprNodeDesc origPredicate) { + this.origPredicate = origPredicate; + } + + public ExprNodeDesc getOrigPredicate() { + return origPredicate; + } + /** * Some filters are generated or implied, which means it is not in the query. * It is added by the analyzer. For example, when we do an inner join, we add diff --git a/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q index c027532..e01a654 100644 --- a/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q +++ b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q @@ -20,6 +20,7 @@ analyze table over1k compute statistics; analyze table over1k compute statistics for columns; set hive.stats.fetch.column.stats=true; +set hive.optimize.point.lookup=false; explain select count(*) from over1k where ( (t=1 and si=2) or (t=2 and si=3) @@ -63,4 +64,4 @@ or (t=17 and si=18) or (t=27 and si=28) or (t=37 and si=38) or (t=47 and si=48) -or (t=52 and si=53)); \ No newline at end of file +or (t=52 and si=53)); diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 9fc3c8d..dad573b 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -1134,11 +1134,15 @@ STAGE PLANS: alias: alterdynamic_part_table Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 + Filter Operator + isSamplingPred: false + predicate: (struct(partcol1,partcol2)) IN (struct(2,'1'), struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean) Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - ListSink + Select Operator + expressions: intcol (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 492e302..af1e1c3 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -678,15 +678,15 @@ STAGE PLANS: alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((state = 'OH') or (state = 'CA')) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (state) IN ('OH', 'CA') (type: boolean) + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out index 9c51ff3..d63f5ed 100644 --- a/ql/src/test/results/clientpositive/flatten_and_or.q.out +++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out @@ -44,15 +44,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,value)) IN (struct('0','8'), struct('1','5'), struct('2','6'), struct('3','8'), struct('4','1'), struct('5','6'), struct('6','1'), struct('7','1'), struct('8','1'), struct('9','1'), struct('10','3')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index d7c40a3..944289b 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -2475,16 +2475,16 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (struct(1,'2000-04-08'), struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2588,13 +2588,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out index 17248e4..f536767 100644 --- a/ql/src/test/results/clientpositive/ppd_transform.q.out +++ b/ql/src/test/results/clientpositive/ppd_transform.q.out @@ -390,21 +390,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('a', 'b') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('c', 'd') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index fb08f10..a21011b 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -2534,16 +2534,16 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (struct(1,'2000-04-08'), struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2648,13 +2648,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out index 52a847a..a6e6e38 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out @@ -405,21 +405,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('a', 'b') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('c', 'd') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c2250e6..54003c3 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: alltypesorc Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out index c2250e6..54003c3 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: alltypesorc Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out index 73bf12d..9e47014 100644 --- a/ql/src/test/results/clientpositive/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/vectorized_case.q.out @@ -46,20 +46,19 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator