diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcStripeMetadata.java llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcStripeMetadata.java index bc87094..aa58da4 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcStripeMetadata.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcStripeMetadata.java @@ -44,6 +44,7 @@ private OrcIndex rowIndex; private final int estimatedMemUsage; + private final OrcProto.StripeFooter footer; private final static HashMap, ObjectEstimator> SIZE_ESTIMATORS; private final static ObjectEstimator SIZE_ESTIMATOR; @@ -60,7 +61,7 @@ public OrcStripeMetadata(OrcBatchKey stripeKey, DataReader mr, StripeInformation stripe, boolean[] includes, boolean[] sargColumns) throws IOException { this.stripeKey = stripeKey; - OrcProto.StripeFooter footer = mr.readStripeFooter(stripe); + this.footer = mr.readStripeFooter(stripe); streams = footer.getStreamsList(); encodings = footer.getColumnsList(); writerTimezone = footer.getWriterTimezone(); @@ -76,6 +77,7 @@ private OrcStripeMetadata(Object id) { streams = new ArrayList<>(); writerTimezone = ""; rowCount = estimatedMemUsage = 0; + this.footer = null; } @VisibleForTesting @@ -102,8 +104,17 @@ public boolean hasAllIndexes(boolean[] includes) { public void loadMissingIndexes(DataReader mr, StripeInformation stripe, boolean[] includes, boolean[] sargColumns) throws IOException { - // TODO: should we save footer to avoid a read here? - rowIndex = mr.readRowIndex(stripe, null, includes, rowIndex.getRowGroupIndex(), + // Do not loose the old indexes. Create a super set includes + OrcProto.RowIndex[] existing = getRowIndexes(); + boolean superset[] = new boolean[Math.max(existing.length, includes.length)]; + for (int i = 0; i < includes.length; i++) { + superset[i] = includes[i]; + } + for (int i = 0; i < existing.length; i++) { + superset[i] = superset[i] || (existing[i] != null); + } + // reuse footer details + rowIndex = mr.readRowIndex(stripe, footer, superset, rowIndex.getRowGroupIndex(), sargColumns, rowIndex.getBloomFilterIndex()); // TODO: theoretically, we should re-estimate memory usage here and update memory manager }