diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e0e1339..756735b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -975,7 +975,7 @@ HIVE_USER_INSTALL_DIR("hive.user.install.directory", "hdfs:///user/"), // Vectorization enabled - HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false), + HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", true), HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000), HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 6af6b2d..d842711 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -93,8 +93,8 @@ protected transient ListBucketingCtx lbCtx; protected transient boolean isSkewedStoredAsSubDirectories; protected transient boolean statsCollectRawDataSize; - private transient boolean[] statsFromRecordWriter; - private transient boolean isCollectRWStats; + protected transient boolean[] statsFromRecordWriter; + protected transient boolean isCollectRWStats; private transient FSPaths prevFsp; private transient FSPaths fpaths; private transient ObjectInspector keyOI; @@ -627,7 +627,7 @@ public void processOp(Object row, int tag) throws HiveException { } } - private boolean areAllTrue(boolean[] statsFromRW) { + protected boolean areAllTrue(boolean[] statsFromRW) { for(boolean b : statsFromRW) { if (!b) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index c6a7c00..f3e0bc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -145,7 +145,12 @@ public void processOp(Object data, int tag) throws HiveException { } rowOutWriters = fpaths.getOutWriters(); - if (conf.isGatherStats()) { + + // check if all record writers implement statistics. if atleast one RW + // doesn't implement stats interface we will fallback to conventional way + // of gathering stats + isCollectRWStats = areAllTrue(statsFromRecordWriter); + if (conf.isGatherStats() && !isCollectRWStats) { if (statsCollectRawDataSize) { SerDeStats stats = serializer.getSerDeStats(); if (stats != null) {