diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e0e1339..756735b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -975,7 +975,7 @@ HIVE_USER_INSTALL_DIR("hive.user.install.directory", "hdfs:///user/"), // Vectorization enabled - HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false), + HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", true), HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000), HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 6af6b2d..d842711 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -93,8 +93,8 @@ protected transient ListBucketingCtx lbCtx; protected transient boolean isSkewedStoredAsSubDirectories; protected transient boolean statsCollectRawDataSize; - private transient boolean[] statsFromRecordWriter; - private transient boolean isCollectRWStats; + protected transient boolean[] statsFromRecordWriter; + protected transient boolean isCollectRWStats; private transient FSPaths prevFsp; private transient FSPaths fpaths; private transient ObjectInspector keyOI; @@ -627,7 +627,7 @@ public void processOp(Object row, int tag) throws HiveException { } } - private boolean areAllTrue(boolean[] statsFromRW) { + protected boolean areAllTrue(boolean[] statsFromRW) { for(boolean b : statsFromRW) { if (!b) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index c6a7c00..f3e0bc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -145,7 +145,12 @@ public void processOp(Object data, int tag) throws HiveException { } rowOutWriters = fpaths.getOutWriters(); - if (conf.isGatherStats()) { + + // check if all record writers implement statistics. if atleast one RW + // doesn't implement stats interface we will fallback to conventional way + // of gathering stats + isCollectRWStats = areAllTrue(statsFromRecordWriter); + if (conf.isGatherStats() && !isCollectRWStats) { if (statsCollectRawDataSize) { SerDeStats stats = serializer.getSerDeStats(); if (stats != null) { diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 6d33314..f8b89f2 100644 Binary files ql/src/test/results/clientpositive/annotate_stats_filter.q.out and ql/src/test/results/clientpositive/annotate_stats_filter.q.out differ diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a909e81..ee9fc6b 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -267,6 +267,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [sq1:loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -502,6 +503,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -654,6 +656,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1729,6 +1732,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index e2868ad..07565f0 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -397,6 +397,7 @@ STAGE PLANS: Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -639,6 +640,7 @@ STAGE PLANS: Truncated Path -> Alias: /dept_orc [d] /emp_orc [e1, e] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -928,6 +930,7 @@ STAGE PLANS: /dept_orc [d] /emp_orc [e] /loc_orc [l] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1219,6 +1222,7 @@ STAGE PLANS: /dept_orc [d] /emp_orc [e] /loc_orc [l] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1448,6 +1452,7 @@ STAGE PLANS: Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1757,6 +1762,7 @@ STAGE PLANS: /dept_orc [d] /emp_orc [e] /loc_orc [l] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 8a86943..f2128a1 100644 Binary files ql/src/test/results/clientpositive/annotate_stats_part.q.out and ql/src/test/results/clientpositive/annotate_stats_part.q.out differ diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index c103697..fa36622 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -352,6 +352,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -490,6 +491,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -625,6 +627,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -918,6 +921,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1186,6 +1190,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1454,6 +1459,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1588,6 +1594,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2134,6 +2141,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2270,6 +2278,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -3218,6 +3227,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -3375,6 +3385,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -3685,6 +3696,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -3857,6 +3869,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4031,6 +4044,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4204,6 +4218,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4382,6 +4397,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4575,6 +4591,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [in2:in1:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4818,6 +4835,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -4961,6 +4979,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out index 1d6b628..6eabf44 100644 --- ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -353,6 +353,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -525,6 +526,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -643,6 +645,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -764,6 +767,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out index 5674a0e..53cd78e 100644 Binary files ql/src/test/results/clientpositive/annotate_stats_union.q.out and ql/src/test/results/clientpositive/annotate_stats_union.q.out differ diff --git ql/src/test/results/clientpositive/limit_pushdown.q.out ql/src/test/results/clientpositive/limit_pushdown.q.out index eac59a9..a5a7187 100644 --- ql/src/test/results/clientpositive/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/limit_pushdown.q.out @@ -360,6 +360,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: double) @@ -446,6 +447,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -535,6 +537,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 1849 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) diff --git ql/src/test/results/clientpositive/orc_createas1.q.out ql/src/test/results/clientpositive/orc_createas1.q.out index 6577bf0..b500dd9 100644 --- ql/src/test/results/clientpositive/orc_createas1.q.out +++ ql/src/test/results/clientpositive/orc_createas1.q.out @@ -188,6 +188,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized Reduce Operator Tree: Extract Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out index 81257ad..1b9b660 100644 --- ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out @@ -833,6 +833,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: string) + Execution mode: vectorized Reduce Operator Tree: Extract Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE @@ -897,6 +898,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: string) + Execution mode: vectorized Reduce Operator Tree: Extract Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE