diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 1dde78e..d4e61d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -92,8 +92,8 @@ protected transient ListBucketingCtx lbCtx; protected transient boolean isSkewedStoredAsSubDirectories; protected transient boolean statsCollectRawDataSize; - private transient boolean[] statsFromRecordWriter; - private transient boolean isCollectRWStats; + protected transient boolean[] statsFromRecordWriter; + protected transient boolean isCollectRWStats; private transient FSPaths prevFsp; private transient FSPaths fpaths; private transient ObjectInspector keyOI; @@ -626,7 +626,7 @@ public void processOp(Object row, int tag) throws HiveException { } } - private boolean areAllTrue(boolean[] statsFromRW) { + protected boolean areAllTrue(boolean[] statsFromRW) { for(boolean b : statsFromRW) { if (!b) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index c6a7c00..e546dd1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -145,7 +145,11 @@ public void processOp(Object data, int tag) throws HiveException { } rowOutWriters = fpaths.getOutWriters(); - if (conf.isGatherStats()) { + // check if all record writers implement statistics. if atleast one RW + // doesn't implement stats interface we will fallback to conventional way + // of gathering stats + isCollectRWStats = areAllTrue(statsFromRecordWriter); + if (conf.isGatherStats() && !isCollectRWStats) { if (statsCollectRawDataSize) { SerDeStats stats = serializer.getSerDeStats(); if (stats != null) {