diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml
index 1a07651..b241daa 100644
--- a/itests/hive-unit/pom.xml
+++ b/itests/hive-unit/pom.xml
@@ -359,6 +359,12 @@
true
+ org.apache.hadoop
+ hadoop-yarn-api
+ ${hadoop.version}
+ test
+
+
org.apache.curator
curator-test
${curator.version}
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index c4ba277..91d6cd6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -493,6 +493,7 @@ minillap.shared.query.files=bucket_map_join_tez1.q,\
llap_nullscan.q,\
mrr.q,\
orc_ppd_basic.q,\
+ orc_llap_counters.q,\
tez_bmj_schema_evolution.q,\
tez_dml.q,\
tez_fsstat.q,\
diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java b/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java
index 9dcacea..61d35d8 100644
--- a/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java
+++ b/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java
@@ -14,9 +14,12 @@
package org.apache.hadoop.hive.llap;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.security.SecurityUtil;
@@ -49,4 +52,79 @@ public static String generateClusterName(Configuration conf) {
String hosts = HiveConf.getTrimmedVar(conf, ConfVars.LLAP_DAEMON_SERVICE_HOSTS);
return hostsRe.matcher(hosts.startsWith("@") ? hosts.substring(1) : hosts).replaceAll("_");
}
+
+ public static StatisticsData getStatisticsForScheme(final String scheme,
+ final List stats) {
+ if (stats != null && scheme != null) {
+ for (StatisticsData s : stats) {
+ if (s.getScheme().equalsIgnoreCase(scheme)) {
+ return s;
+ }
+ }
+ }
+ return null;
+ }
+
+ public static List cloneThreadLocalFileSystemStatistics() {
+ List result = new ArrayList<>();
+ // thread local filesystem stats is private and cannot be clone. So make a copy to new class
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ result.add(new StatisticsData(statistics.getScheme(), statistics.getThreadStatistics()));
+ }
+ return result;
+ }
+
+ public static class StatisticsData {
+ long bytesRead;
+ long bytesWritten;
+ int readOps;
+ int largeReadOps;
+ int writeOps;
+ String scheme;
+
+ public StatisticsData(String scheme, FileSystem.Statistics.StatisticsData fsStats) {
+ this.scheme = scheme;
+ this.bytesRead = fsStats.getBytesRead();
+ this.bytesWritten = fsStats.getBytesWritten();
+ this.readOps = fsStats.getReadOps();
+ this.largeReadOps = fsStats.getLargeReadOps();
+ this.writeOps = fsStats.getWriteOps();
+ }
+
+ public long getBytesRead() {
+ return bytesRead;
+ }
+
+ public long getBytesWritten() {
+ return bytesWritten;
+ }
+
+ public int getReadOps() {
+ return readOps;
+ }
+
+ public int getLargeReadOps() {
+ return largeReadOps;
+ }
+
+ public int getWriteOps() {
+ return writeOps;
+ }
+
+ public String getScheme() {
+ return scheme;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(" scheme: ").append(scheme);
+ sb.append(" bytesRead: ").append(bytesRead);
+ sb.append(" bytesWritten: ").append(bytesWritten);
+ sb.append(" readOps: ").append(readOps);
+ sb.append(" largeReadOps: ").append(largeReadOps);
+ sb.append(" writeOps: ").append(writeOps);
+ return sb.toString();
+ }
+ }
}
diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java b/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java
index 365ddab..1ed23ba 100644
--- a/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java
+++ b/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java
@@ -15,23 +15,43 @@
*/
package org.apache.hadoop.hive.llap.counters;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* LLAP IO related counters.
*/
public enum LlapIOCounters {
- NUM_VECTOR_BATCHES,
- NUM_DECODED_BATCHES,
- SELECTED_ROWGROUPS,
- NUM_ERRORS,
- ROWS_EMITTED,
- METADATA_CACHE_HIT,
- METADATA_CACHE_MISS,
- CACHE_HIT_BYTES,
- CACHE_MISS_BYTES,
- ALLOCATED_BYTES,
- ALLOCATED_USED_BYTES,
- TOTAL_IO_TIME_NS,
- DECODE_TIME_NS,
- HDFS_TIME_NS,
- CONSUMER_TIME_NS
+ NUM_VECTOR_BATCHES(true),
+ NUM_DECODED_BATCHES(true),
+ SELECTED_ROWGROUPS(true),
+ NUM_ERRORS(true),
+ ROWS_EMITTED(true),
+ METADATA_CACHE_HIT(true),
+ METADATA_CACHE_MISS(true),
+ CACHE_HIT_BYTES(true),
+ CACHE_MISS_BYTES(true),
+ ALLOCATED_BYTES(true),
+ ALLOCATED_USED_BYTES(true),
+ TOTAL_IO_TIME_NS(false),
+ DECODE_TIME_NS(false),
+ HDFS_TIME_NS(false),
+ CONSUMER_TIME_NS(false);
+
+ // flag to indicate if these counters are subject to change across different test runs
+ private boolean testSafe;
+
+ LlapIOCounters(final boolean testSafe) {
+ this.testSafe = testSafe;
+ }
+
+ public static List testSafeCounterNames() {
+ List testSafeCounters = new ArrayList<>();
+ for (LlapIOCounters counter : values()) {
+ if (counter.testSafe) {
+ testSafeCounters.add(counter.name());
+ }
+ }
+ return testSafeCounters;
+ }
}
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java b/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java
index a53ac61..0c858eb 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java
@@ -135,4 +135,8 @@ public String toString() {
sb.append(" ]");
return sb.toString();
}
+
+ public TezCounters getTezCounters() {
+ return tezCounters;
+ }
}
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java
new file mode 100644
index 0000000..4fb1e11
--- /dev/null
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.daemon.impl;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.FutureTask;
+import java.util.concurrent.RunnableFuture;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.llap.LlapUtil;
+import org.apache.hadoop.hive.llap.counters.LlapIOCounters;
+import org.apache.hadoop.hive.llap.io.encoded.OrcEncodedDataReader;
+import org.apache.tez.common.counters.FileSystemCounter;
+import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.runtime.task.TaskRunner2Callable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Custom thread pool implementation that records per thread file system statistics in TezCounters.
+ */
+public class StatsRecordingThreadPool extends ThreadPoolExecutor {
+ private static final Logger LOG = LoggerFactory.getLogger(StatsRecordingThreadPool.class);
+ // map that stores snapshot of FileSystem's thread local stats object before thread execution
+ private Map> threadsStatsBefore;
+ // uncaught exception handler that will be set for all threads before execution
+ private Thread.UncaughtExceptionHandler uncaughtExceptionHandler;
+
+ public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize,
+ final long keepAliveTime,
+ final TimeUnit unit,
+ final BlockingQueue workQueue,
+ final ThreadFactory threadFactory) {
+ this(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory, null);
+ }
+
+ public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize,
+ final long keepAliveTime,
+ final TimeUnit unit,
+ final BlockingQueue workQueue,
+ final ThreadFactory threadFactory, Thread.UncaughtExceptionHandler handler) {
+ super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory);
+ this.threadsStatsBefore = new ConcurrentHashMap<>(corePoolSize);
+ this.uncaughtExceptionHandler = handler;
+ }
+
+ @Override
+ protected RunnableFuture newTaskFor(final Callable callable) {
+ return new WrappedFutureTask<>(callable);
+ }
+
+ @Override
+ protected void beforeExecute(final Thread t, final Runnable r) {
+ // setup uncaught exception handler for thread
+ if (uncaughtExceptionHandler != null) {
+ t.setUncaughtExceptionHandler(uncaughtExceptionHandler);
+ }
+ // clone thread local file system statistics
+ threadsStatsBefore.put(t.getId(), LlapUtil.cloneThreadLocalFileSystemStatistics());
+ super.beforeExecute(t, r);
+ }
+
+ @Override
+ protected void afterExecute(final Runnable r, final Throwable t) {
+ Thread thread = Thread.currentThread();
+ if (r instanceof WrappedFutureTask) {
+ Callable wrappedCallable = ((WrappedFutureTask) r).getWrappedCallable();
+ TezCounters tezCounters = null;
+ // add tez counters for task execution and llap io
+ if (wrappedCallable instanceof TaskRunner2Callable) {
+ TaskRunner2Callable taskRunner2Callable = (TaskRunner2Callable) wrappedCallable;
+ // counters for task execution side
+ tezCounters = taskRunner2Callable.addAndGetTezCounter(LlapIOCounters.class.getName());
+ } else if (wrappedCallable instanceof OrcEncodedDataReader) {
+ // counters for llap io side
+ tezCounters = ((OrcEncodedDataReader) wrappedCallable).getTezCounters();
+ }
+
+ if (tezCounters != null) {
+ List statsBefore = threadsStatsBefore.get(thread.getId());
+ if (statsBefore != null) {
+ List allStatistics = FileSystem.getAllStatistics();
+ for (FileSystem.Statistics statistics : allStatistics) {
+ FileSystem.Statistics.StatisticsData threadFSStats = statistics.getThreadStatistics();
+ LlapUtil.StatisticsData sb = LlapUtil
+ .getStatisticsForScheme(statistics.getScheme(), statsBefore);
+ final long bytesReadDelta;
+ final long bytesWrittenDelta;
+ final long readOpsDelta;
+ final long largeReadOpsDelta;
+ final long writeOpsDelta;
+ // there could be more scheme after execution as execution might be accessing a
+ // different filesystem. So if we don't find a matching scheme before execution we just
+ // use the after execution values directly without computing delta difference
+ if (sb != null) {
+ bytesReadDelta = threadFSStats.getBytesRead() - sb.getBytesRead();
+ bytesWrittenDelta = threadFSStats.getBytesWritten() - sb.getBytesWritten();
+ readOpsDelta = threadFSStats.getReadOps() - sb.getReadOps();
+ largeReadOpsDelta = threadFSStats.getLargeReadOps() - sb.getLargeReadOps();
+ writeOpsDelta = threadFSStats.getWriteOps() - sb.getWriteOps();
+ } else {
+ bytesReadDelta = threadFSStats.getBytesRead();
+ bytesWrittenDelta = threadFSStats.getBytesWritten();
+ readOpsDelta = threadFSStats.getReadOps();
+ largeReadOpsDelta = threadFSStats.getLargeReadOps();
+ writeOpsDelta = threadFSStats.getWriteOps();
+ }
+ tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.BYTES_READ)
+ .increment(bytesReadDelta);
+ tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.BYTES_WRITTEN)
+ .increment(bytesWrittenDelta);
+ tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.READ_OPS)
+ .increment(readOpsDelta);
+ tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.LARGE_READ_OPS)
+ .increment(largeReadOpsDelta);
+ tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.WRITE_OPS)
+ .increment(writeOpsDelta);
+ }
+ }
+ } else {
+ LOG.warn("TezCounters is null for callable type: {}",
+ wrappedCallable.getClass().getSimpleName());
+ }
+ }
+
+ // remove the stored snapshot of file system statistics
+ threadsStatsBefore.remove(thread.getId());
+ super.afterExecute(r, t);
+ }
+
+ public void setUncaughtExceptionHandler(Thread.UncaughtExceptionHandler handler) {
+ this.uncaughtExceptionHandler = handler;
+ }
+
+ /**
+ * Wrapped future task that provides access to the callable that it is wrapping
+ *
+ * @param
+ */
+ private static class WrappedFutureTask extends FutureTask {
+ private Callable internalCallable;
+
+ WrappedFutureTask(final Callable callable) {
+ super(callable);
+ this.internalCallable = callable;
+ }
+
+ Callable getWrappedCallable() {
+ return internalCallable;
+ }
+ }
+}
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
index 0d9882b..394c67f 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
@@ -23,7 +23,9 @@
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
@@ -46,6 +48,7 @@
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.tez.common.CallableWithNdc;
import org.apache.tez.common.TezCommonUtils;
import org.apache.tez.common.security.JobTokenIdentifier;
@@ -99,7 +102,7 @@
private final FragmentCompletionHandler fragmentCompletionHanler;
private volatile TezTaskRunner2 taskRunner;
private volatile TaskReporterInterface taskReporter;
- private volatile ListeningExecutorService executor;
+ private volatile ExecutorService executor;
private LlapTaskUmbilicalProtocol umbilical;
private volatile long startTime;
private volatile String threadName;
@@ -179,12 +182,14 @@ protected TaskRunner2Result callInternal() throws Exception {
}
// TODO This executor seems unnecessary. Here and TezChild
- ExecutorService executorReal = Executors.newFixedThreadPool(1,
+ ThreadPoolExecutor customExecutor = new StatsRecordingThreadPool(1, 1,
+ 0L, TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue(),
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("TezTaskRunner")
.build());
- executor = MoreExecutors.listeningDecorator(executorReal);
+ executor = customExecutor;
// TODO Consolidate this code with TezChild.
runtimeWatch.start();
@@ -210,12 +215,7 @@ public LlapTaskUmbilicalProtocol run() throws Exception {
}
});
- TezTaskAttemptID taskAttemptID = taskSpec.getTaskAttemptID();
- TezTaskID taskId = taskAttemptID.getTaskID();
- TezVertexID tezVertexID = taskId.getVertexID();
- TezDAGID tezDAGID = tezVertexID.getDAGId();
- String fragFullId = Joiner.on('_').join(tezDAGID.getId(), tezVertexID.getId(), taskId.getId(),
- taskAttemptID.getId());
+ String fragFullId = taskSpec.getTaskAttemptID().toString();
taskReporter = new LlapTaskReporter(
umbilical,
confParams.amHeartbeatIntervalMsMax,
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 298f788..2a785ae 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -22,16 +22,18 @@
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
+import java.util.concurrent.ExecutorService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.llap.ConsumerFeedback;
-import org.apache.hadoop.hive.llap.DebugUtils;
import org.apache.hadoop.hive.llap.counters.FragmentCountersMap;
import org.apache.hadoop.hive.llap.counters.LlapIOCounters;
import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters;
+import org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool;
import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer;
import org.apache.hadoop.hive.llap.io.decode.ReadPipeline;
+import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -58,11 +60,6 @@
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.util.concurrent.FutureCallback;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.common.util.concurrent.ListeningExecutorService;
public class LlapInputFormat implements InputFormat,
VectorizedInputFormatInterface, SelfDescribingInputFormatInterface,
@@ -71,12 +68,12 @@
private final InputFormat sourceInputFormat;
private final AvoidSplitCombination sourceASC;
private final ColumnVectorProducer cvp;
- private final ListeningExecutorService executor;
+ private final ExecutorService executor;
private final String hostName;
@SuppressWarnings("rawtypes")
LlapInputFormat(InputFormat sourceInputFormat, ColumnVectorProducer cvp,
- ListeningExecutorService executor) {
+ ExecutorService executor) {
// TODO: right now, we do nothing with source input format, ORC-only in the first cut.
// We'd need to plumb it thru and use it to get data to cache/etc.
assert sourceInputFormat instanceof OrcInputFormat;
@@ -153,19 +150,13 @@ public LlapRecordReader(
this.columnIds = includedCols;
this.sarg = ConvertAstToSearchArg.createFromConf(job);
this.columnNames = ColumnProjectionUtils.getReadColumnNames(job);
- String dagId = job.get("tez.mapreduce.dag.index");
- String vertexId = job.get("tez.mapreduce.vertex.index");
- String taskId = job.get("tez.mapreduce.task.index");
- String taskAttemptId = job.get("tez.mapreduce.task.attempt.index");
+ String appId = LlapTezUtils.getApplicationIdString(job);
TezCounters taskCounters = null;
- if (dagId != null && vertexId != null && taskId != null && taskAttemptId != null) {
- String fullId = Joiner.on('_').join(dagId, vertexId, taskId, taskAttemptId);
- taskCounters = FragmentCountersMap.getCountersForFragment(fullId);
- LOG.info("Received dagid_vertexid_taskid_attempid: {}", fullId);
+ if (appId != null) {
+ taskCounters = FragmentCountersMap.getCountersForFragment(appId);
+ LOG.info("Received full application id: {}", appId);
} else {
- LOG.warn("Not using tez counters as some identifier is null." +
- " dagId: {} vertexId: {} taskId: {} taskAttempId: {}",
- dagId, vertexId, taskId, taskAttemptId);
+ LOG.warn("Not using tez counters as application id string is null");
}
this.counters = new QueryFragmentCounters(job, taskCounters);
this.counters.setDesc(QueryFragmentCounters.Desc.MACHINE, hostName);
@@ -233,17 +224,12 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti
}
- private final class UncaughtErrorHandler implements FutureCallback {
+ private final class IOUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler {
@Override
- public void onSuccess(Void result) {
- // Successful execution of reader is supposed to call setDone.
- }
-
- @Override
- public void onFailure(Throwable t) {
- // Reader is not supposed to throw AFTER calling setError.
- LlapIoImpl.LOG.error("Unhandled error from reader thread " + t.getMessage());
- setError(t);
+ public void uncaughtException(final Thread t, final Throwable e) {
+ LlapIoImpl.LOG.error("Unhandled error from reader thread. threadName: {} threadId: {}" +
+ " Message: {}", t.getName(), t.getId(), e.getMessage());
+ setError(e);
}
}
@@ -252,9 +238,12 @@ private void startRead() {
ReadPipeline rp = cvp.createReadPipeline(
this, split, columnIds, sarg, columnNames, counters);
feedback = rp;
- ListenableFuture future = executor.submit(rp.getReadCallable());
- // TODO: we should NOT do this thing with handler. Reader needs to do cleanup in most cases.
- Futures.addCallback(future, new UncaughtErrorHandler());
+ if (executor instanceof StatsRecordingThreadPool) {
+ // Every thread created by this thread pool will use the same handler
+ ((StatsRecordingThreadPool) executor)
+ .setUncaughtExceptionHandler(new IOUncaughtExceptionHandler());
+ }
+ executor.submit(rp.getReadCallable());
}
ColumnVectorBatch nextCvb() throws InterruptedException, IOException {
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
index fea3dc7..f3f596b 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
@@ -22,10 +22,15 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
import javax.management.ObjectName;
+import org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -69,7 +74,7 @@
private static final String MODE_CACHE = "cache", MODE_ALLOCATOR = "allocator";
private final ColumnVectorProducer cvp;
- private final ListeningExecutorService executor;
+ private final ExecutorService executor;
private LlapDaemonCacheMetrics cacheMetrics;
private LlapDaemonIOMetrics ioMetrics;
private ObjectName buddyAllocatorMXBean;
@@ -137,8 +142,10 @@ private LlapIoImpl(Configuration conf) throws IOException {
}
// IO thread pool. Listening is used for unhandled errors for now (TODO: remove?)
int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_IO_THREADPOOL_SIZE);
- executor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(numThreads,
- new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()));
+ executor = new StatsRecordingThreadPool(numThreads, numThreads,
+ 0L, TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue(),
+ new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build());
// TODO: this should depends on input format and be in a map, or something.
this.cvp = new OrcColumnVectorProducer(
metadataCache, orcCache, bufferManager, conf, cacheMetrics, ioMetrics);
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index 69c0647..2164a3a 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -29,6 +29,7 @@
import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics;
import org.apache.orc.impl.DataReaderProperties;
import org.apache.orc.impl.OrcIndex;
+import org.apache.tez.common.counters.TezCounters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -911,4 +912,8 @@ public OrcIndex readRowIndex(StripeInformation stripe,
return orcDataReader.readStripeFooter(stripe);
}
}
+
+ public TezCounters getTezCounters() {
+ return counters.getTezCounters();
+ }
}
diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java
index 2c3e53c..99f0731 100644
--- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java
+++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java
@@ -14,11 +14,24 @@
package org.apache.hadoop.hive.llap.tezplugins;
+import java.text.NumberFormat;
+
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.tez.dag.records.TezDAGID;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.dag.records.TezTaskID;
+import org.apache.tez.dag.records.TezVertexID;
+import org.apache.tez.mapreduce.hadoop.MRHelpers;
+import org.apache.tez.mapreduce.hadoop.MRInputHelpers;
+import org.apache.tez.mapreduce.hadoop.MRJobConfig;
import org.apache.tez.mapreduce.input.MRInput;
import org.apache.tez.mapreduce.input.MRInputLegacy;
import org.apache.tez.mapreduce.input.MultiMRInput;
+import com.google.common.base.Joiner;
+
@InterfaceAudience.Private
public class LlapTezUtils {
public static boolean isSourceOfInterest(String inputClassName) {
@@ -26,4 +39,31 @@ public static boolean isSourceOfInterest(String inputClassName) {
return !(inputClassName.equals(MRInputLegacy.class.getName()) || inputClassName.equals(
MultiMRInput.class.getName()) || inputClassName.equals(MRInput.class.getName()));
}
+
+ // FIXME: This is no longer required after TEZ-3290
+ public static String getApplicationIdString(final JobConf job) {
+ int dagIdx = MRInputHelpers.getDagIndex(job);
+ int vertexIdx = MRInputHelpers.getVertexIndex(job);
+ int taskIdx = MRInputHelpers.getTaskIndex(job);
+ int taskAttemptIdx = MRInputHelpers.getTaskAttemptIndex(job);
+ String appId = MRInputHelpers.getApplicationIdString(job);
+ ApplicationId applicationId = createApplicationIdFromString(appId);
+ if (applicationId != null) {
+ TezTaskAttemptID tezTaskAttemptID = TezTaskAttemptID.getInstance(
+ TezTaskID.getInstance(TezVertexID.getInstance(
+ TezDAGID.getInstance(applicationId, dagIdx), vertexIdx), taskIdx), taskAttemptIdx);
+ return tezTaskAttemptID.toString();
+ }
+ return null;
+ }
+
+ private static ApplicationId createApplicationIdFromString(final String appIdStr) {
+ String[] tokens = appIdStr.split("_");
+ if (tokens.length == 3) {
+ long clusterTimestamp = Long.parseLong(tokens[1]);
+ int appId = Integer.parseInt(tokens[2]);
+ return ApplicationId.newInstance(clusterTimestamp, appId);
+ }
+ return null;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
index 838f320..d87d301 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
@@ -36,6 +36,7 @@
import java.util.SortedSet;
import java.util.TreeSet;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.llap.counters.LlapIOCounters;
import org.apache.hadoop.hive.ql.Context;
@@ -47,6 +48,7 @@
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.tez.common.counters.FileSystemCounter;
import org.apache.tez.common.counters.TaskCounter;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.common.counters.TezCounters;
@@ -80,6 +82,7 @@
private static final String QUERY_EXEC_SUMMARY_HEADER = "Query Execution Summary";
private static final String TASK_SUMMARY_HEADER = "Task Execution Summary";
private static final String LLAP_IO_SUMMARY_HEADER = "LLAP IO Summary";
+ private static final String FS_COUNTERS_SUMMARY_HEADER = "FileSystem Counters Summary";
// keep this within 80 chars width. If more columns needs to be added then update min terminal
// width requirement and SEPARATOR width accordingly
@@ -106,6 +109,9 @@
"VERTICES", "ROWGROUPS", "META_HIT", "META_MISS", "DATA_HIT", "DATA_MISS",
"ALLOCATION", "USED", "TOTAL_IO");
+ // FileSystem counters
+ private static final String FS_COUNTERS_HEADER_FORMAT = "%10s %15s %13s %18s %18s %13s";
+
// Methods summary
private static final String OPERATION_SUMMARY = "%-35s %9s";
private static final String OPERATION = "OPERATION";
@@ -391,6 +397,10 @@ public int monitorExecution(final DAGClient dagClient, HiveConf conf,
console.printInfo(LLAP_IO_SUMMARY_HEADER);
printLlapIOSummary(progressMap, console, dagClient);
console.printInfo(SEPARATOR);
+ console.printInfo("");
+
+ console.printInfo(FS_COUNTERS_SUMMARY_HEADER);
+ printFSCountersSummary(progressMap, console, dagClient);
}
console.printInfo("");
@@ -697,6 +707,62 @@ private void printLlapIOSummary(Map progressMap, LogHelper con
}
}
+ private void printFSCountersSummary(Map progressMap, LogHelper console,
+ DAGClient dagClient) {
+ SortedSet keys = new TreeSet<>(progressMap.keySet());
+ Set statusOptions = new HashSet<>(1);
+ statusOptions.add(StatusGetOpts.GET_COUNTERS);
+ // Assuming FileSystem.getAllStatistics() returns all schemes that are accessed on task side
+ // as well. If not, we need a way to get all the schemes that are accessed by the tez task/llap.
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ final String scheme = statistics.getScheme().toUpperCase();
+ final String fsCountersHeader = String.format(FS_COUNTERS_HEADER_FORMAT,
+ "VERTICES", "BYTES_READ", "READ_OPS", "LARGE_READ_OPS", "BYTES_WRITTEN", "WRITE_OPS");
+
+ console.printInfo("");
+ reprintLineWithColorAsBold("Scheme: " + scheme, Ansi.Color.RED);
+ console.printInfo(SEPARATOR);
+ reprintLineWithColorAsBold(fsCountersHeader, Ansi.Color.CYAN);
+ console.printInfo(SEPARATOR);
+
+ for (String vertexName : keys) {
+ TezCounters vertexCounters = null;
+ try {
+ vertexCounters = dagClient.getVertexStatus(vertexName, statusOptions)
+ .getVertexCounters();
+ } catch (IOException e) {
+ // best attempt, shouldn't really kill DAG for this
+ } catch (TezException e) {
+ // best attempt, shouldn't really kill DAG for this
+ }
+ if (vertexCounters != null) {
+ final String counterGroup = FileSystemCounter.class.getName();
+ final long bytesRead = getCounterValueByGroupName(vertexCounters,
+ counterGroup, scheme + "_" + FileSystemCounter.BYTES_READ.name());
+ final long bytesWritten = getCounterValueByGroupName(vertexCounters,
+ counterGroup, scheme + "_" + FileSystemCounter.BYTES_WRITTEN.name());
+ final long readOps = getCounterValueByGroupName(vertexCounters,
+ counterGroup, scheme + "_" + FileSystemCounter.READ_OPS.name());
+ final long largeReadOps = getCounterValueByGroupName(vertexCounters,
+ counterGroup, scheme + "_" + FileSystemCounter.LARGE_READ_OPS.name());
+ final long writeOps = getCounterValueByGroupName(vertexCounters,
+ counterGroup, scheme + "_" + FileSystemCounter.WRITE_OPS.name());
+
+ String fsCountersSummary = String.format(FS_COUNTERS_HEADER_FORMAT,
+ vertexName,
+ humanReadableByteCount(bytesRead),
+ readOps,
+ largeReadOps,
+ humanReadableByteCount(bytesWritten),
+ writeOps);
+ console.printInfo(fsCountersSummary);
+ }
+ }
+
+ console.printInfo(SEPARATOR);
+ }
+ }
+
private void printStatusInPlace(Map progressMap, long startTime,
boolean vextexStatusFromAM, DAGClient dagClient) {
StringBuilder reportBuffer = new StringBuilder();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
index 81bda08..ba59192 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
@@ -19,6 +19,8 @@
import java.util.List;
+import org.apache.hadoop.hive.llap.counters.LlapIOCounters;
+import org.apache.tez.common.counters.FileSystemCounter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -63,6 +65,23 @@ public void run(HookContext hookContext) throws Exception {
for (TezCounter counter : group) {
console.printError(" " + counter.getDisplayName() + ": " + counter.getValue());
}
+ } else if (group.getDisplayName().equals("File System Counters")) {
+ console.printError(tezTask.getId() + " FILE SYSTEM COUNTERS:");
+ for (TezCounter counter : group) {
+ // HDFS counters should be relatively consistent across test runs when compared to
+ // local file system counters
+ if (counter.getName().contains("HDFS")) {
+ console.printError(" " + counter.getDisplayName() + ": " + counter.getValue());
+ }
+ }
+ } else if (group.getDisplayName().equals(LlapIOCounters.class.getName())) {
+ console.printError(tezTask.getId() + " LLAP IO COUNTERS:");
+ List testSafeCounters = LlapIOCounters.testSafeCounterNames();
+ for (TezCounter counter : group) {
+ if (testSafeCounters.contains(counter.getDisplayName())) {
+ console.printError(" " + counter.getDisplayName() + ": " + counter.getValue());
+ }
+ }
}
}
}
diff --git a/ql/src/test/queries/clientpositive/orc_llap_counters.q b/ql/src/test/queries/clientpositive/orc_llap_counters.q
new file mode 100644
index 0000000..1bd55d3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_llap_counters.q
@@ -0,0 +1,182 @@
+set hive.mapred.mode=nonstrict;
+SET hive.optimize.index.filter=true;
+SET hive.cbo.enable=false;
+SET hive.vectorized.execution.enabled=true;
+SET hive.llap.io.enabled=true;
+
+CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
+LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging;
+
+CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");
+
+insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s;
+
+-- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1;
+insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1;
+
+CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");
+
+insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s;
+
+describe formatted orc_ppd;
+
+SET hive.fetch.task.conversion=none;
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+
+-- Row group statistics for column t:
+-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0
+-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11
+-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19
+
+-- INPUT_RECORDS: 2100 (all row groups)
+select count(*) from orc_ppd;
+
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+select count(*) from orc_ppd where t <=> 50;
+select count(*) from orc_ppd where t <=> 100;
+
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = "54";
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = -10.0;
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = cast(53 as float);
+select count(*) from orc_ppd where t = cast(53 as double);
+
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t < 100;
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t < 100 and t > 98;
+
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t <= 100;
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t is null;
+
+-- INPUT_RECORDS: 1100 (2 row groups)
+select count(*) from orc_ppd where t in (5, 120);
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t between 60 and 80;
+
+-- bloom filter tests
+-- INPUT_RECORDS: 0
+select count(*) from orc_ppd where t = -100;
+select count(*) from orc_ppd where t <=> -100;
+select count(*) from orc_ppd where t = 125;
+select count(*) from orc_ppd where t IN (-100, 125, 200);
+
+-- Row group statistics for column s:
+-- Entry 0: count: 1000 hasNull: false min: max: zach young sum: 12907 positions: 0,0,0
+-- Entry 1: count: 1000 hasNull: false min: alice allen max: zach zipper sum: 12704 positions: 0,1611,191
+-- Entry 2: count: 100 hasNull: false min: bob davidson max: zzz sum: 1281 positions: 0,3246,373
+
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where s > "zzz";
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where s = "zach young";
+select count(*) from orc_ppd where s <=> "zach zipper";
+select count(*) from orc_ppd where s <=> "";
+
+-- INPUT_RECORDS: 0
+select count(*) from orc_ppd where s is null;
+
+-- INPUT_RECORDS: 2100
+select count(*) from orc_ppd where s is not null;
+
+-- INPUT_RECORDS: 0
+select count(*) from orc_ppd where s = cast("zach young" as char(50));
+
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where s = cast("zach young" as char(10));
+select count(*) from orc_ppd where s = cast("zach young" as varchar(10));
+select count(*) from orc_ppd where s = cast("zach young" as varchar(50));
+
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s < "b";
+
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s > "alice" and s < "bob";
+
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s in ("alice allen", "");
+
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s between "" and "alice allen";
+
+-- INPUT_RECORDS: 100 (1 row group)
+select count(*) from orc_ppd where s between "zz" and "zzz";
+
+-- INPUT_RECORDS: 1100 (2 row groups)
+select count(*) from orc_ppd where s between "zach zipper" and "zzz";
+
+-- bloom filter tests
+-- INPUT_RECORDS: 0
+select count(*) from orc_ppd where s = "hello world";
+select count(*) from orc_ppd where s <=> "apache hive";
+select count(*) from orc_ppd where s IN ("a", "z");
+
+-- INPUT_RECORDS: 100
+select count(*) from orc_ppd where s = "sarah ovid";
+
+-- INPUT_RECORDS: 1100
+select count(*) from orc_ppd where s = "wendy king";
+
+-- INPUT_RECORDS: 1000
+select count(*) from orc_ppd where s = "wendy king" and t < 0;
+
+-- INPUT_RECORDS: 100
+select count(*) from orc_ppd where s = "wendy king" and t > 100;
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
new file mode 100644
index 0000000..1a57d14
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
@@ -0,0 +1,1261 @@
+PREHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@staging
+POSTHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_ppd_staging
+PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION []
+PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION []
+PREHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_ppd
+PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_staging
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_ppd_staging
+POSTHOOK: Output: default@orc_ppd
+POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: describe formatted orc_ppd
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@orc_ppd
+POSTHOOK: query: describe formatted orc_ppd
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@orc_ppd
+# col_name data_type comment
+
+t tinyint
+si smallint
+i int
+b bigint
+f float
+d double
+bo boolean
+s string
+c char(50)
+v varchar(50)
+da date
+ts timestamp
+dec decimal(4,2)
+bin binary
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 2100
+ orc.bloom.filter.columns *
+ orc.row.index.stride 1000
+ rawDataSize 1223514
+ totalSize 60010
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Row group statistics for column t:
+-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0
+-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11
+-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19
+
+-- INPUT_RECORDS: 2100 (all row groups)
+select count(*) from orc_ppd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16711
+ HDFS_BYTES_WRITTEN: 104
+ HDFS_READ_OPS: 5
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ METADATA_CACHE_MISS: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+2100
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 638
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ ALLOCATED_BYTES: 524288
+ ALLOCATED_USED_BYTES: 269
+ CACHE_MISS_BYTES: 249
+ METADATA_CACHE_HIT: 1
+ METADATA_CACHE_MISS: 1
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+8
+PREHOOK: query: select count(*) from orc_ppd where t <=> 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+22
+PREHOOK: query: select count(*) from orc_ppd where t <=> 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+16
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = "54"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 2000
+ SELECTED_ROWGROUPS: 2
+18
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = -10.0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+1
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = cast(53 as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+32
+PREHOOK: query: select count(*) from orc_ppd where t = cast(53 as double)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+32
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 104
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 2000
+ SELECTED_ROWGROUPS: 2
+1697
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t < 100 and t > 98
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+12
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t <= 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 104
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 2000
+ SELECTED_ROWGROUPS: 2
+1713
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+6
+PREHOOK: query: -- INPUT_RECORDS: 1100 (2 row groups)
+select count(*) from orc_ppd where t in (5, 120)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 1100
+ SELECTED_ROWGROUPS: 2
+50
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t between 60 and 80
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 103
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+318
+PREHOOK: query: -- bloom filter tests
+-- INPUT_RECORDS: 0
+select count(*) from orc_ppd where t = -100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: select count(*) from orc_ppd where t <=> -100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: select count(*) from orc_ppd where t = 125
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 0
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 100
+ SELECTED_ROWGROUPS: 1
+0
+PREHOOK: query: select count(*) from orc_ppd where t IN (-100, 125, 200)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 0
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 249
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 100
+ SELECTED_ROWGROUPS: 1
+0
+PREHOOK: query: -- Row group statistics for column s:
+-- Entry 0: count: 1000 hasNull: false min: max: zach young sum: 12907 positions: 0,0,0
+-- Entry 1: count: 1000 hasNull: false min: alice allen max: zach zipper sum: 12704 positions: 0,1611,191
+-- Entry 2: count: 100 hasNull: false min: bob davidson max: zzz sum: 1281 positions: 0,3246,373
+
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where s > "zzz"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where s = "zach young"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 4402
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ ALLOCATED_BYTES: 786432
+ ALLOCATED_USED_BYTES: 11299
+ CACHE_HIT_BYTES: 0
+ CACHE_MISS_BYTES: 3980
+ METADATA_CACHE_HIT: 1
+ METADATA_CACHE_MISS: 1
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+2
+PREHOOK: query: select count(*) from orc_ppd where s <=> "zach zipper"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: select count(*) from orc_ppd where s <=> ""
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 22574
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 5
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: -- INPUT_RECORDS: 0
+select count(*) from orc_ppd where s is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 2100
+select count(*) from orc_ppd where s is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 104
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+2100
+PREHOOK: query: -- INPUT_RECORDS: 0
+select count(*) from orc_ppd where s = cast("zach young" as char(50))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 0
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 1100
+ SELECTED_ROWGROUPS: 2
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where s = cast("zach young" as char(10))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+2
+PREHOOK: query: select count(*) from orc_ppd where s = cast("zach young" as varchar(10))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+2
+PREHOOK: query: select count(*) from orc_ppd where s = cast("zach young" as varchar(50))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+2
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s < "b"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 2000
+ SELECTED_ROWGROUPS: 2
+81
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s > "alice" and s < "bob"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 2000
+ SELECTED_ROWGROUPS: 2
+74
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s in ("alice allen", "")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 2000
+ SELECTED_ROWGROUPS: 2
+12
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where s between "" and "alice allen"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 2000
+ SELECTED_ROWGROUPS: 2
+13
+PREHOOK: query: -- INPUT_RECORDS: 100 (1 row group)
+select count(*) from orc_ppd where s between "zz" and "zzz"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 100
+ SELECTED_ROWGROUPS: 1
+1
+PREHOOK: query: -- INPUT_RECORDS: 1100 (2 row groups)
+select count(*) from orc_ppd where s between "zach zipper" and "zzz"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 1100
+ SELECTED_ROWGROUPS: 2
+7
+PREHOOK: query: -- bloom filter tests
+-- INPUT_RECORDS: 0
+select count(*) from orc_ppd where s = "hello world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 0
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+0
+PREHOOK: query: select count(*) from orc_ppd where s <=> "apache hive"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 18594
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 3
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+0
+PREHOOK: query: select count(*) from orc_ppd where s IN ("a", "z")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 0
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+0
+PREHOOK: query: -- INPUT_RECORDS: 100
+select count(*) from orc_ppd where s = "sarah ovid"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+2
+PREHOOK: query: -- INPUT_RECORDS: 1100
+select count(*) from orc_ppd where s = "wendy king"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 3980
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
+6
+PREHOOK: query: -- INPUT_RECORDS: 1000
+select count(*) from orc_ppd where s = "wendy king" and t < 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 4229
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 1000
+ SELECTED_ROWGROUPS: 1
+2
+PREHOOK: query: -- INPUT_RECORDS: 100
+select count(*) from orc_ppd where s = "wendy king" and t > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 0
+ HDFS_READ_OPS: 0
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 0
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 0
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 4229
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 2
+ NUM_VECTOR_BATCHES: 2
+ ROWS_EMITTED: 1100
+ SELECTED_ROWGROUPS: 2
+2