diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 604bea7..fb11065 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -979,7 +979,7 @@ HIVE_USER_INSTALL_DIR("hive.user.install.directory", "hdfs:///user/"), // Vectorization enabled - HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false), + HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", true), HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000), HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 1dde78e..d4e61d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -92,8 +92,8 @@ protected transient ListBucketingCtx lbCtx; protected transient boolean isSkewedStoredAsSubDirectories; protected transient boolean statsCollectRawDataSize; - private transient boolean[] statsFromRecordWriter; - private transient boolean isCollectRWStats; + protected transient boolean[] statsFromRecordWriter; + protected transient boolean isCollectRWStats; private transient FSPaths prevFsp; private transient FSPaths fpaths; private transient ObjectInspector keyOI; @@ -626,7 +626,7 @@ public void processOp(Object row, int tag) throws HiveException { } } - private boolean areAllTrue(boolean[] statsFromRW) { + protected boolean areAllTrue(boolean[] statsFromRW) { for(boolean b : statsFromRW) { if (!b) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index c6a7c00..f3e0bc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -145,7 +145,12 @@ public void processOp(Object data, int tag) throws HiveException { } rowOutWriters = fpaths.getOutWriters(); - if (conf.isGatherStats()) { + + // check if all record writers implement statistics. if atleast one RW + // doesn't implement stats interface we will fallback to conventional way + // of gathering stats + isCollectRWStats = areAllTrue(statsFromRecordWriter); + if (conf.isGatherStats() && !isCollectRWStats) { if (statsCollectRawDataSize) { SerDeStats stats = serializer.getSerDeStats(); if (stats != null) { diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 50335ec..bdc1fc9 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -219,6 +219,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -363,6 +364,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -495,6 +497,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -623,6 +626,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -755,6 +759,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -886,6 +891,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1018,6 +1024,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1149,6 +1156,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1544,6 +1552,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1681,6 +1690,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1821,6 +1831,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1963,6 +1974,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2105,6 +2117,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2237,6 +2250,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2365,6 +2379,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2493,6 +2508,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2621,6 +2637,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 26cf65b..ad844bd 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -267,6 +267,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [sq1:loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -504,6 +505,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -658,6 +660,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1747,6 +1750,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index b36e3b7..7c1723b 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -397,6 +397,7 @@ STAGE PLANS: Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -641,6 +642,7 @@ STAGE PLANS: Truncated Path -> Alias: /dept_orc [d] /emp_orc [e1, e] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -932,6 +934,7 @@ STAGE PLANS: /dept_orc [d] /emp_orc [e] /loc_orc [l] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1225,6 +1228,7 @@ STAGE PLANS: /dept_orc [d] /emp_orc [e] /loc_orc [l] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1456,6 +1460,7 @@ STAGE PLANS: Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1767,6 +1772,7 @@ STAGE PLANS: /dept_orc [d] /emp_orc [e] /loc_orc [l] + Execution mode: vectorized Needs Tagging: true Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index eee003b..a67ec56 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -1210,6 +1210,7 @@ STAGE PLANS: Truncated Path -> Alias: /loc_orc/year=2001 [loc_orc] /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1379,6 +1380,7 @@ STAGE PLANS: Truncated Path -> Alias: /loc_orc/year=2001 [loc_orc] /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1553,6 +1555,7 @@ STAGE PLANS: Truncated Path -> Alias: /loc_orc/year=2001 [loc_orc] /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1684,6 +1687,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc/year=2001 [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1815,6 +1819,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2087,6 +2092,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc/year=2001 [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2225,6 +2231,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc/year=2001 [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2374,6 +2381,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc/year=2001 [test:loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index d1c5be6..e6c156b 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -352,6 +352,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -492,6 +493,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -629,6 +631,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -926,6 +929,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1198,6 +1202,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1470,6 +1475,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1606,6 +1612,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2160,6 +2167,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2298,6 +2306,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -3260,6 +3269,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -3419,6 +3429,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -3733,6 +3744,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -3907,6 +3919,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4083,6 +4096,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4258,6 +4272,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4438,6 +4453,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [temp:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4633,6 +4649,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [in2:in1:alltypes_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -4878,6 +4895,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -5023,6 +5041,7 @@ STAGE PLANS: name: default.alltypes_orc Truncated Path -> Alias: /alltypes_orc [alltypes_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out index 8de1e9d..85eb601 100644 --- ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -353,6 +353,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -527,6 +528,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -647,6 +649,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -770,6 +773,7 @@ STAGE PLANS: name: default.emp_orc Truncated Path -> Alias: /emp_orc [emp_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out index 75ff252..6300740 100644 --- ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -177,6 +177,7 @@ STAGE PLANS: name: default.loc_orc Truncated Path -> Alias: /loc_orc [loc_orc] + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/limit_pushdown.q.out ql/src/test/results/clientpositive/limit_pushdown.q.out index 5678d53..8c01088 100644 --- ql/src/test/results/clientpositive/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/limit_pushdown.q.out @@ -368,6 +368,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: double) @@ -456,6 +457,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -547,6 +549,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 1849 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) diff --git ql/src/test/results/clientpositive/orc_createas1.q.out ql/src/test/results/clientpositive/orc_createas1.q.out index 8016a53..afec803 100644 --- ql/src/test/results/clientpositive/orc_createas1.q.out +++ ql/src/test/results/clientpositive/orc_createas1.q.out @@ -188,6 +188,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized Reduce Operator Tree: Extract Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out index 5ee5905..f6d4a18 100644 --- ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out @@ -845,6 +845,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: string) + Execution mode: vectorized Reduce Operator Tree: Extract Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE @@ -911,6 +912,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: string) + Execution mode: vectorized Reduce Operator Tree: Extract Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE