diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java index c021daf..df8df25 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java @@ -16,6 +16,8 @@ import java.io.IOException; import java.util.Properties; +import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; +import org.apache.hadoop.hive.serde2.SerDeStats; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -37,12 +39,14 @@ import org.apache.parquet.hadoop.util.ContextUtil; public class ParquetRecordWriterWrapper implements RecordWriter, - org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { - + org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter, StatsProvidingRecordWriter { +//public class ParquetRecordWriterWrapper implements RecordWriter, +// org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordWriterWrapper.class); private final org.apache.hadoop.mapreduce.RecordWriter realWriter; private final TaskAttemptContext taskContext; + private final SerDeStats stats; public ParquetRecordWriterWrapper( final OutputFormat realOutputFormat, @@ -67,6 +71,8 @@ public ParquetRecordWriterWrapper( ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); LOG.info("real writer: " + realWriter); + this.stats = new SerDeStats(); + } catch (final InterruptedException e) { throw new IOException(e); } @@ -125,4 +131,22 @@ public void write(final Writable w) throws IOException { write(null, (ParquetHiveRecord) w); } + @Override + public SerDeStats getStats() { + long rawDataSize = getRawDataSize(); + long numberOfRows = getNumberOfRows(); + stats.setRawDataSize(null == realWriter ? 0 : rawDataSize); + stats.setRowCount(null == realWriter ? 0 : numberOfRows); + return stats; + } + + public long getRawDataSize() { + //TODO : maybe later org.apache.parquet.hadoop.ParquetRecordWriter support getRawDataSize() + return 1; + } + + public long getNumberOfRows() { + //TODO maybe later org.apache.parquet.hadoop.ParquetRecordWriter support getNumberOfRows() + return 1; + } }