diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 8f17c12..b7ec309 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -88,6 +88,9 @@ private final IntWritable bucket = new IntWritable(); private final LongWritable rowId = new LongWritable(); private long insertedRows = 0; + // This records how many rows have been inserted or deleted. It is separate from insertedRows + // because that is monotonically increasing to give new unique row ids. + private long rowCountDelta = 0; private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder(); static class AcidStats { @@ -263,6 +266,7 @@ public void insert(long currentTransaction, Object row) throws IOException { } addEvent(INSERT_OPERATION, currentTransaction, currentTransaction, insertedRows++, row); + rowCountDelta++; } @Override @@ -283,6 +287,7 @@ public void delete(long currentTransaction, long originalTransaction, } addEvent(DELETE_OPERATION, currentTransaction, originalTransaction, rowId, null); + rowCountDelta--; } @Override @@ -317,7 +322,11 @@ public void close(boolean abort) throws IOException { @Override public SerDeStats getStats() { - return null; + SerDeStats stats = new SerDeStats(); + stats.setRowCount(rowCountDelta); + // Don't worry about setting raw data size diff. I have no idea how to calculate that + // without finding the row we are updating or deleting, which would be a mess. + return stats; } @VisibleForTesting diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java index b53bd85..db553f5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordUpdater; +import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; @@ -95,6 +96,10 @@ public void testWriter() throws Exception { updater.insert(12, new MyRow("fourth")); updater.insert(12, new MyRow("fifth")); updater.flush(); + + // Check the stats + assertEquals(5L, updater.getStats().getRowCount()); + Path bucketPath = AcidUtils.createFilename(root, options); Path sidePath = OrcRecordUpdater.getSideFile(bucketPath); DataInputStream side = fs.open(sidePath); @@ -158,6 +163,8 @@ public void testWriter() throws Exception { reader = OrcFile.createReader(bucketPath, new OrcFile.ReaderOptions(conf).filesystem(fs)); assertEquals(6, reader.getNumberOfRows()); + assertEquals(6L, updater.getStats().getRowCount()); + assertEquals(false, fs.exists(sidePath)); } @@ -182,6 +189,7 @@ public void testUpdates() throws Exception { RecordUpdater updater = new OrcRecordUpdater(root, options); updater.update(100, 10, 30, new MyRow("update")); updater.delete(100, 40, 60); + assertEquals(-1L, updater.getStats().getRowCount()); updater.close(false); Path bucketPath = AcidUtils.createFilename(root, options);