diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml index 1a07651..b241daa 100644 --- a/itests/hive-unit/pom.xml +++ b/itests/hive-unit/pom.xml @@ -359,6 +359,12 @@ true + org.apache.hadoop + hadoop-yarn-api + ${hadoop.version} + test + + org.apache.curator curator-test ${curator.version} diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index c4ba277..91d6cd6 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -493,6 +493,7 @@ minillap.shared.query.files=bucket_map_join_tez1.q,\ llap_nullscan.q,\ mrr.q,\ orc_ppd_basic.q,\ + orc_llap_counters.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ tez_fsstat.q,\ diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java b/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java index 9dcacea..61d35d8 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/LlapUtil.java @@ -14,9 +14,12 @@ package org.apache.hadoop.hive.llap; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.security.SecurityUtil; @@ -49,4 +52,79 @@ public static String generateClusterName(Configuration conf) { String hosts = HiveConf.getTrimmedVar(conf, ConfVars.LLAP_DAEMON_SERVICE_HOSTS); return hostsRe.matcher(hosts.startsWith("@") ? hosts.substring(1) : hosts).replaceAll("_"); } + + public static StatisticsData getStatisticsForScheme(final String scheme, + final List stats) { + if (stats != null && scheme != null) { + for (StatisticsData s : stats) { + if (s.getScheme().equalsIgnoreCase(scheme)) { + return s; + } + } + } + return null; + } + + public static List cloneThreadLocalFileSystemStatistics() { + List result = new ArrayList<>(); + // thread local filesystem stats is private and cannot be clone. So make a copy to new class + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + result.add(new StatisticsData(statistics.getScheme(), statistics.getThreadStatistics())); + } + return result; + } + + public static class StatisticsData { + long bytesRead; + long bytesWritten; + int readOps; + int largeReadOps; + int writeOps; + String scheme; + + public StatisticsData(String scheme, FileSystem.Statistics.StatisticsData fsStats) { + this.scheme = scheme; + this.bytesRead = fsStats.getBytesRead(); + this.bytesWritten = fsStats.getBytesWritten(); + this.readOps = fsStats.getReadOps(); + this.largeReadOps = fsStats.getLargeReadOps(); + this.writeOps = fsStats.getWriteOps(); + } + + public long getBytesRead() { + return bytesRead; + } + + public long getBytesWritten() { + return bytesWritten; + } + + public int getReadOps() { + return readOps; + } + + public int getLargeReadOps() { + return largeReadOps; + } + + public int getWriteOps() { + return writeOps; + } + + public String getScheme() { + return scheme; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" scheme: ").append(scheme); + sb.append(" bytesRead: ").append(bytesRead); + sb.append(" bytesWritten: ").append(bytesWritten); + sb.append(" readOps: ").append(readOps); + sb.append(" largeReadOps: ").append(largeReadOps); + sb.append(" writeOps: ").append(writeOps); + return sb.toString(); + } + } } diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java b/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java index 365ddab..1ed23ba 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java @@ -15,23 +15,43 @@ */ package org.apache.hadoop.hive.llap.counters; +import java.util.ArrayList; +import java.util.List; + /** * LLAP IO related counters. */ public enum LlapIOCounters { - NUM_VECTOR_BATCHES, - NUM_DECODED_BATCHES, - SELECTED_ROWGROUPS, - NUM_ERRORS, - ROWS_EMITTED, - METADATA_CACHE_HIT, - METADATA_CACHE_MISS, - CACHE_HIT_BYTES, - CACHE_MISS_BYTES, - ALLOCATED_BYTES, - ALLOCATED_USED_BYTES, - TOTAL_IO_TIME_NS, - DECODE_TIME_NS, - HDFS_TIME_NS, - CONSUMER_TIME_NS + NUM_VECTOR_BATCHES(true), + NUM_DECODED_BATCHES(true), + SELECTED_ROWGROUPS(true), + NUM_ERRORS(true), + ROWS_EMITTED(true), + METADATA_CACHE_HIT(true), + METADATA_CACHE_MISS(true), + CACHE_HIT_BYTES(true), + CACHE_MISS_BYTES(true), + ALLOCATED_BYTES(true), + ALLOCATED_USED_BYTES(true), + TOTAL_IO_TIME_NS(false), + DECODE_TIME_NS(false), + HDFS_TIME_NS(false), + CONSUMER_TIME_NS(false); + + // flag to indicate if these counters are subject to change across different test runs + private boolean testSafe; + + LlapIOCounters(final boolean testSafe) { + this.testSafe = testSafe; + } + + public static List testSafeCounterNames() { + List testSafeCounters = new ArrayList<>(); + for (LlapIOCounters counter : values()) { + if (counter.testSafe) { + testSafeCounters.add(counter.name()); + } + } + return testSafeCounters; + } } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java b/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java index a53ac61..0c858eb 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java @@ -135,4 +135,8 @@ public String toString() { sb.append(" ]"); return sb.toString(); } + + public TezCounters getTezCounters() { + return tezCounters; + } } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java new file mode 100644 index 0000000..4fb1e11 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.daemon.impl; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.FutureTask; +import java.util.concurrent.RunnableFuture; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.counters.LlapIOCounters; +import org.apache.hadoop.hive.llap.io.encoded.OrcEncodedDataReader; +import org.apache.tez.common.counters.FileSystemCounter; +import org.apache.tez.common.counters.TezCounters; +import org.apache.tez.runtime.task.TaskRunner2Callable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Custom thread pool implementation that records per thread file system statistics in TezCounters. + */ +public class StatsRecordingThreadPool extends ThreadPoolExecutor { + private static final Logger LOG = LoggerFactory.getLogger(StatsRecordingThreadPool.class); + // map that stores snapshot of FileSystem's thread local stats object before thread execution + private Map> threadsStatsBefore; + // uncaught exception handler that will be set for all threads before execution + private Thread.UncaughtExceptionHandler uncaughtExceptionHandler; + + public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize, + final long keepAliveTime, + final TimeUnit unit, + final BlockingQueue workQueue, + final ThreadFactory threadFactory) { + this(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory, null); + } + + public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize, + final long keepAliveTime, + final TimeUnit unit, + final BlockingQueue workQueue, + final ThreadFactory threadFactory, Thread.UncaughtExceptionHandler handler) { + super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory); + this.threadsStatsBefore = new ConcurrentHashMap<>(corePoolSize); + this.uncaughtExceptionHandler = handler; + } + + @Override + protected RunnableFuture newTaskFor(final Callable callable) { + return new WrappedFutureTask<>(callable); + } + + @Override + protected void beforeExecute(final Thread t, final Runnable r) { + // setup uncaught exception handler for thread + if (uncaughtExceptionHandler != null) { + t.setUncaughtExceptionHandler(uncaughtExceptionHandler); + } + // clone thread local file system statistics + threadsStatsBefore.put(t.getId(), LlapUtil.cloneThreadLocalFileSystemStatistics()); + super.beforeExecute(t, r); + } + + @Override + protected void afterExecute(final Runnable r, final Throwable t) { + Thread thread = Thread.currentThread(); + if (r instanceof WrappedFutureTask) { + Callable wrappedCallable = ((WrappedFutureTask) r).getWrappedCallable(); + TezCounters tezCounters = null; + // add tez counters for task execution and llap io + if (wrappedCallable instanceof TaskRunner2Callable) { + TaskRunner2Callable taskRunner2Callable = (TaskRunner2Callable) wrappedCallable; + // counters for task execution side + tezCounters = taskRunner2Callable.addAndGetTezCounter(LlapIOCounters.class.getName()); + } else if (wrappedCallable instanceof OrcEncodedDataReader) { + // counters for llap io side + tezCounters = ((OrcEncodedDataReader) wrappedCallable).getTezCounters(); + } + + if (tezCounters != null) { + List statsBefore = threadsStatsBefore.get(thread.getId()); + if (statsBefore != null) { + List allStatistics = FileSystem.getAllStatistics(); + for (FileSystem.Statistics statistics : allStatistics) { + FileSystem.Statistics.StatisticsData threadFSStats = statistics.getThreadStatistics(); + LlapUtil.StatisticsData sb = LlapUtil + .getStatisticsForScheme(statistics.getScheme(), statsBefore); + final long bytesReadDelta; + final long bytesWrittenDelta; + final long readOpsDelta; + final long largeReadOpsDelta; + final long writeOpsDelta; + // there could be more scheme after execution as execution might be accessing a + // different filesystem. So if we don't find a matching scheme before execution we just + // use the after execution values directly without computing delta difference + if (sb != null) { + bytesReadDelta = threadFSStats.getBytesRead() - sb.getBytesRead(); + bytesWrittenDelta = threadFSStats.getBytesWritten() - sb.getBytesWritten(); + readOpsDelta = threadFSStats.getReadOps() - sb.getReadOps(); + largeReadOpsDelta = threadFSStats.getLargeReadOps() - sb.getLargeReadOps(); + writeOpsDelta = threadFSStats.getWriteOps() - sb.getWriteOps(); + } else { + bytesReadDelta = threadFSStats.getBytesRead(); + bytesWrittenDelta = threadFSStats.getBytesWritten(); + readOpsDelta = threadFSStats.getReadOps(); + largeReadOpsDelta = threadFSStats.getLargeReadOps(); + writeOpsDelta = threadFSStats.getWriteOps(); + } + tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.BYTES_READ) + .increment(bytesReadDelta); + tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.BYTES_WRITTEN) + .increment(bytesWrittenDelta); + tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.READ_OPS) + .increment(readOpsDelta); + tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.LARGE_READ_OPS) + .increment(largeReadOpsDelta); + tezCounters.findCounter(statistics.getScheme(), FileSystemCounter.WRITE_OPS) + .increment(writeOpsDelta); + } + } + } else { + LOG.warn("TezCounters is null for callable type: {}", + wrappedCallable.getClass().getSimpleName()); + } + } + + // remove the stored snapshot of file system statistics + threadsStatsBefore.remove(thread.getId()); + super.afterExecute(r, t); + } + + public void setUncaughtExceptionHandler(Thread.UncaughtExceptionHandler handler) { + this.uncaughtExceptionHandler = handler; + } + + /** + * Wrapped future task that provides access to the callable that it is wrapping + * + * @param + */ + private static class WrappedFutureTask extends FutureTask { + private Callable internalCallable; + + WrappedFutureTask(final Callable callable) { + super(callable); + this.internalCallable = callable; + } + + Callable getWrappedCallable() { + return internalCallable; + } + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java index 0d9882b..394c67f 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java @@ -23,7 +23,9 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -46,6 +48,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.CallableWithNdc; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.security.JobTokenIdentifier; @@ -99,7 +102,7 @@ private final FragmentCompletionHandler fragmentCompletionHanler; private volatile TezTaskRunner2 taskRunner; private volatile TaskReporterInterface taskReporter; - private volatile ListeningExecutorService executor; + private volatile ExecutorService executor; private LlapTaskUmbilicalProtocol umbilical; private volatile long startTime; private volatile String threadName; @@ -179,12 +182,14 @@ protected TaskRunner2Result callInternal() throws Exception { } // TODO This executor seems unnecessary. Here and TezChild - ExecutorService executorReal = Executors.newFixedThreadPool(1, + ThreadPoolExecutor customExecutor = new StatsRecordingThreadPool(1, 1, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue(), new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("TezTaskRunner") .build()); - executor = MoreExecutors.listeningDecorator(executorReal); + executor = customExecutor; // TODO Consolidate this code with TezChild. runtimeWatch.start(); @@ -210,12 +215,7 @@ public LlapTaskUmbilicalProtocol run() throws Exception { } }); - TezTaskAttemptID taskAttemptID = taskSpec.getTaskAttemptID(); - TezTaskID taskId = taskAttemptID.getTaskID(); - TezVertexID tezVertexID = taskId.getVertexID(); - TezDAGID tezDAGID = tezVertexID.getDAGId(); - String fragFullId = Joiner.on('_').join(tezDAGID.getId(), tezVertexID.getId(), taskId.getId(), - taskAttemptID.getId()); + String fragFullId = taskSpec.getTaskAttemptID().toString(); taskReporter = new LlapTaskReporter( umbilical, confParams.amHeartbeatIntervalMsMax, diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 298f788..2a785ae 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -22,16 +22,18 @@ import java.io.IOException; import java.util.LinkedList; import java.util.List; +import java.util.concurrent.ExecutorService; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.llap.ConsumerFeedback; -import org.apache.hadoop.hive.llap.DebugUtils; import org.apache.hadoop.hive.llap.counters.FragmentCountersMap; import org.apache.hadoop.hive.llap.counters.LlapIOCounters; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; +import org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool; import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer; import org.apache.hadoop.hive.llap.io.decode.ReadPipeline; +import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -58,11 +60,6 @@ import org.slf4j.LoggerFactory; import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; public class LlapInputFormat implements InputFormat, VectorizedInputFormatInterface, SelfDescribingInputFormatInterface, @@ -71,12 +68,12 @@ private final InputFormat sourceInputFormat; private final AvoidSplitCombination sourceASC; private final ColumnVectorProducer cvp; - private final ListeningExecutorService executor; + private final ExecutorService executor; private final String hostName; @SuppressWarnings("rawtypes") LlapInputFormat(InputFormat sourceInputFormat, ColumnVectorProducer cvp, - ListeningExecutorService executor) { + ExecutorService executor) { // TODO: right now, we do nothing with source input format, ORC-only in the first cut. // We'd need to plumb it thru and use it to get data to cache/etc. assert sourceInputFormat instanceof OrcInputFormat; @@ -153,19 +150,13 @@ public LlapRecordReader( this.columnIds = includedCols; this.sarg = ConvertAstToSearchArg.createFromConf(job); this.columnNames = ColumnProjectionUtils.getReadColumnNames(job); - String dagId = job.get("tez.mapreduce.dag.index"); - String vertexId = job.get("tez.mapreduce.vertex.index"); - String taskId = job.get("tez.mapreduce.task.index"); - String taskAttemptId = job.get("tez.mapreduce.task.attempt.index"); + String appId = LlapTezUtils.getApplicationIdString(job); TezCounters taskCounters = null; - if (dagId != null && vertexId != null && taskId != null && taskAttemptId != null) { - String fullId = Joiner.on('_').join(dagId, vertexId, taskId, taskAttemptId); - taskCounters = FragmentCountersMap.getCountersForFragment(fullId); - LOG.info("Received dagid_vertexid_taskid_attempid: {}", fullId); + if (appId != null) { + taskCounters = FragmentCountersMap.getCountersForFragment(appId); + LOG.info("Received full application id: {}", appId); } else { - LOG.warn("Not using tez counters as some identifier is null." + - " dagId: {} vertexId: {} taskId: {} taskAttempId: {}", - dagId, vertexId, taskId, taskAttemptId); + LOG.warn("Not using tez counters as application id string is null"); } this.counters = new QueryFragmentCounters(job, taskCounters); this.counters.setDesc(QueryFragmentCounters.Desc.MACHINE, hostName); @@ -233,17 +224,12 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti } - private final class UncaughtErrorHandler implements FutureCallback { + private final class IOUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler { @Override - public void onSuccess(Void result) { - // Successful execution of reader is supposed to call setDone. - } - - @Override - public void onFailure(Throwable t) { - // Reader is not supposed to throw AFTER calling setError. - LlapIoImpl.LOG.error("Unhandled error from reader thread " + t.getMessage()); - setError(t); + public void uncaughtException(final Thread t, final Throwable e) { + LlapIoImpl.LOG.error("Unhandled error from reader thread. threadName: {} threadId: {}" + + " Message: {}", t.getName(), t.getId(), e.getMessage()); + setError(e); } } @@ -252,9 +238,12 @@ private void startRead() { ReadPipeline rp = cvp.createReadPipeline( this, split, columnIds, sarg, columnNames, counters); feedback = rp; - ListenableFuture future = executor.submit(rp.getReadCallable()); - // TODO: we should NOT do this thing with handler. Reader needs to do cleanup in most cases. - Futures.addCallback(future, new UncaughtErrorHandler()); + if (executor instanceof StatsRecordingThreadPool) { + // Every thread created by this thread pool will use the same handler + ((StatsRecordingThreadPool) executor) + .setUncaughtExceptionHandler(new IOUncaughtExceptionHandler()); + } + executor.submit(rp.getReadCallable()); } ColumnVectorBatch nextCvb() throws InterruptedException, IOException { diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index fea3dc7..f3f596b 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -22,10 +22,15 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import javax.management.ObjectName; +import org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -69,7 +74,7 @@ private static final String MODE_CACHE = "cache", MODE_ALLOCATOR = "allocator"; private final ColumnVectorProducer cvp; - private final ListeningExecutorService executor; + private final ExecutorService executor; private LlapDaemonCacheMetrics cacheMetrics; private LlapDaemonIOMetrics ioMetrics; private ObjectName buddyAllocatorMXBean; @@ -137,8 +142,10 @@ private LlapIoImpl(Configuration conf) throws IOException { } // IO thread pool. Listening is used for unhandled errors for now (TODO: remove?) int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_IO_THREADPOOL_SIZE); - executor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(numThreads, - new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build())); + executor = new StatsRecordingThreadPool(numThreads, numThreads, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue(), + new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); // TODO: this should depends on input format and be in a map, or something. this.cvp = new OrcColumnVectorProducer( metadataCache, orcCache, bufferManager, conf, cacheMetrics, ioMetrics); diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 69c0647..2164a3a 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; import org.apache.orc.impl.DataReaderProperties; import org.apache.orc.impl.OrcIndex; +import org.apache.tez.common.counters.TezCounters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -911,4 +912,8 @@ public OrcIndex readRowIndex(StripeInformation stripe, return orcDataReader.readStripeFooter(stripe); } } + + public TezCounters getTezCounters() { + return counters.getTezCounters(); + } } diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java index 2c3e53c..99f0731 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java @@ -14,11 +14,24 @@ package org.apache.hadoop.hive.llap.tezplugins; +import java.text.NumberFormat; + import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezTaskID; +import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.mapreduce.hadoop.MRHelpers; +import org.apache.tez.mapreduce.hadoop.MRInputHelpers; +import org.apache.tez.mapreduce.hadoop.MRJobConfig; import org.apache.tez.mapreduce.input.MRInput; import org.apache.tez.mapreduce.input.MRInputLegacy; import org.apache.tez.mapreduce.input.MultiMRInput; +import com.google.common.base.Joiner; + @InterfaceAudience.Private public class LlapTezUtils { public static boolean isSourceOfInterest(String inputClassName) { @@ -26,4 +39,31 @@ public static boolean isSourceOfInterest(String inputClassName) { return !(inputClassName.equals(MRInputLegacy.class.getName()) || inputClassName.equals( MultiMRInput.class.getName()) || inputClassName.equals(MRInput.class.getName())); } + + // FIXME: This is no longer required after TEZ-3290 + public static String getApplicationIdString(final JobConf job) { + int dagIdx = MRInputHelpers.getDagIndex(job); + int vertexIdx = MRInputHelpers.getVertexIndex(job); + int taskIdx = MRInputHelpers.getTaskIndex(job); + int taskAttemptIdx = MRInputHelpers.getTaskAttemptIndex(job); + String appId = MRInputHelpers.getApplicationIdString(job); + ApplicationId applicationId = createApplicationIdFromString(appId); + if (applicationId != null) { + TezTaskAttemptID tezTaskAttemptID = TezTaskAttemptID.getInstance( + TezTaskID.getInstance(TezVertexID.getInstance( + TezDAGID.getInstance(applicationId, dagIdx), vertexIdx), taskIdx), taskAttemptIdx); + return tezTaskAttemptID.toString(); + } + return null; + } + + private static ApplicationId createApplicationIdFromString(final String appIdStr) { + String[] tokens = appIdStr.split("_"); + if (tokens.length == 3) { + long clusterTimestamp = Long.parseLong(tokens[1]); + int appId = Integer.parseInt(tokens[2]); + return ApplicationId.newInstance(clusterTimestamp, appId); + } + return null; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java index 838f320..d87d301 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java @@ -36,6 +36,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.counters.LlapIOCounters; import org.apache.hadoop.hive.ql.Context; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.tez.common.counters.FileSystemCounter; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; @@ -80,6 +82,7 @@ private static final String QUERY_EXEC_SUMMARY_HEADER = "Query Execution Summary"; private static final String TASK_SUMMARY_HEADER = "Task Execution Summary"; private static final String LLAP_IO_SUMMARY_HEADER = "LLAP IO Summary"; + private static final String FS_COUNTERS_SUMMARY_HEADER = "FileSystem Counters Summary"; // keep this within 80 chars width. If more columns needs to be added then update min terminal // width requirement and SEPARATOR width accordingly @@ -106,6 +109,9 @@ "VERTICES", "ROWGROUPS", "META_HIT", "META_MISS", "DATA_HIT", "DATA_MISS", "ALLOCATION", "USED", "TOTAL_IO"); + // FileSystem counters + private static final String FS_COUNTERS_HEADER_FORMAT = "%10s %15s %13s %18s %18s %13s"; + // Methods summary private static final String OPERATION_SUMMARY = "%-35s %9s"; private static final String OPERATION = "OPERATION"; @@ -391,6 +397,10 @@ public int monitorExecution(final DAGClient dagClient, HiveConf conf, console.printInfo(LLAP_IO_SUMMARY_HEADER); printLlapIOSummary(progressMap, console, dagClient); console.printInfo(SEPARATOR); + console.printInfo(""); + + console.printInfo(FS_COUNTERS_SUMMARY_HEADER); + printFSCountersSummary(progressMap, console, dagClient); } console.printInfo(""); @@ -697,6 +707,62 @@ private void printLlapIOSummary(Map progressMap, LogHelper con } } + private void printFSCountersSummary(Map progressMap, LogHelper console, + DAGClient dagClient) { + SortedSet keys = new TreeSet<>(progressMap.keySet()); + Set statusOptions = new HashSet<>(1); + statusOptions.add(StatusGetOpts.GET_COUNTERS); + // Assuming FileSystem.getAllStatistics() returns all schemes that are accessed on task side + // as well. If not, we need a way to get all the schemes that are accessed by the tez task/llap. + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + final String scheme = statistics.getScheme().toUpperCase(); + final String fsCountersHeader = String.format(FS_COUNTERS_HEADER_FORMAT, + "VERTICES", "BYTES_READ", "READ_OPS", "LARGE_READ_OPS", "BYTES_WRITTEN", "WRITE_OPS"); + + console.printInfo(""); + reprintLineWithColorAsBold("Scheme: " + scheme, Ansi.Color.RED); + console.printInfo(SEPARATOR); + reprintLineWithColorAsBold(fsCountersHeader, Ansi.Color.CYAN); + console.printInfo(SEPARATOR); + + for (String vertexName : keys) { + TezCounters vertexCounters = null; + try { + vertexCounters = dagClient.getVertexStatus(vertexName, statusOptions) + .getVertexCounters(); + } catch (IOException e) { + // best attempt, shouldn't really kill DAG for this + } catch (TezException e) { + // best attempt, shouldn't really kill DAG for this + } + if (vertexCounters != null) { + final String counterGroup = FileSystemCounter.class.getName(); + final long bytesRead = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.BYTES_READ.name()); + final long bytesWritten = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.BYTES_WRITTEN.name()); + final long readOps = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.READ_OPS.name()); + final long largeReadOps = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.LARGE_READ_OPS.name()); + final long writeOps = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.WRITE_OPS.name()); + + String fsCountersSummary = String.format(FS_COUNTERS_HEADER_FORMAT, + vertexName, + humanReadableByteCount(bytesRead), + readOps, + largeReadOps, + humanReadableByteCount(bytesWritten), + writeOps); + console.printInfo(fsCountersSummary); + } + } + + console.printInfo(SEPARATOR); + } + } + private void printStatusInPlace(Map progressMap, long startTime, boolean vextexStatusFromAM, DAGClient dagClient) { StringBuilder reportBuffer = new StringBuilder(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java index 81bda08..ba59192 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java @@ -19,6 +19,8 @@ import java.util.List; +import org.apache.hadoop.hive.llap.counters.LlapIOCounters; +import org.apache.tez.common.counters.FileSystemCounter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -63,6 +65,23 @@ public void run(HookContext hookContext) throws Exception { for (TezCounter counter : group) { console.printError(" " + counter.getDisplayName() + ": " + counter.getValue()); } + } else if (group.getDisplayName().equals("File System Counters")) { + console.printError(tezTask.getId() + " FILE SYSTEM COUNTERS:"); + for (TezCounter counter : group) { + // HDFS counters should be relatively consistent across test runs when compared to + // local file system counters + if (counter.getName().contains("HDFS")) { + console.printError(" " + counter.getDisplayName() + ": " + counter.getValue()); + } + } + } else if (group.getDisplayName().equals(LlapIOCounters.class.getName())) { + console.printError(tezTask.getId() + " LLAP IO COUNTERS:"); + List testSafeCounters = LlapIOCounters.testSafeCounterNames(); + for (TezCounter counter : group) { + if (testSafeCounters.contains(counter.getDisplayName())) { + console.printError(" " + counter.getDisplayName() + ": " + counter.getValue()); + } + } } } } diff --git a/ql/src/test/queries/clientpositive/orc_llap_counters.q b/ql/src/test/queries/clientpositive/orc_llap_counters.q new file mode 100644 index 0000000..1bd55d3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_llap_counters.q @@ -0,0 +1,182 @@ +set hive.mapred.mode=nonstrict; +SET hive.optimize.index.filter=true; +SET hive.cbo.enable=false; +SET hive.vectorized.execution.enabled=true; +SET hive.llap.io.enabled=true; + +CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging; +LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging; + +CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s; + +-- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1; +insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1; + +CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s; + +describe formatted orc_ppd; + +SET hive.fetch.task.conversion=none; +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; + +-- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 2100 (all row groups) +select count(*) from orc_ppd; + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; +select count(*) from orc_ppd where t <=> 50; +select count(*) from orc_ppd where t <=> 100; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = "54"; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = -10.0; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = cast(53 as float); +select count(*) from orc_ppd where t = cast(53 as double); + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t < 100; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t < 100 and t > 98; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t <= 100; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t is null; + +-- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where t in (5, 120); + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t between 60 and 80; + +-- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where t = -100; +select count(*) from orc_ppd where t <=> -100; +select count(*) from orc_ppd where t = 125; +select count(*) from orc_ppd where t IN (-100, 125, 200); + +-- Row group statistics for column s: +-- Entry 0: count: 1000 hasNull: false min: max: zach young sum: 12907 positions: 0,0,0 +-- Entry 1: count: 1000 hasNull: false min: alice allen max: zach zipper sum: 12704 positions: 0,1611,191 +-- Entry 2: count: 100 hasNull: false min: bob davidson max: zzz sum: 1281 positions: 0,3246,373 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where s > "zzz"; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = "zach young"; +select count(*) from orc_ppd where s <=> "zach zipper"; +select count(*) from orc_ppd where s <=> ""; + +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s is null; + +-- INPUT_RECORDS: 2100 +select count(*) from orc_ppd where s is not null; + +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = cast("zach young" as char(50)); + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = cast("zach young" as char(10)); +select count(*) from orc_ppd where s = cast("zach young" as varchar(10)); +select count(*) from orc_ppd where s = cast("zach young" as varchar(50)); + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s < "b"; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s > "alice" and s < "bob"; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s in ("alice allen", ""); + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s between "" and "alice allen"; + +-- INPUT_RECORDS: 100 (1 row group) +select count(*) from orc_ppd where s between "zz" and "zzz"; + +-- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where s between "zach zipper" and "zzz"; + +-- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = "hello world"; +select count(*) from orc_ppd where s <=> "apache hive"; +select count(*) from orc_ppd where s IN ("a", "z"); + +-- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "sarah ovid"; + +-- INPUT_RECORDS: 1100 +select count(*) from orc_ppd where s = "wendy king"; + +-- INPUT_RECORDS: 1000 +select count(*) from orc_ppd where s = "wendy king" and t < 0; + +-- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "wendy king" and t > 100; diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out new file mode 100644 index 0000000..1a57d14 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -0,0 +1,1261 @@ +PREHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd_staging +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd_staging +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_ppd_staging +POSTHOOK: Output: default@orc_ppd +POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: describe formatted orc_ppd +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_ppd +POSTHOOK: query: describe formatted orc_ppd +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_ppd +# col_name data_type comment + +t tinyint +si smallint +i int +b bigint +f float +d double +bo boolean +s string +c char(50) +v varchar(50) +da date +ts timestamp +dec decimal(4,2) +bin binary + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2100 + orc.bloom.filter.columns * + orc.row.index.stride 1000 + rawDataSize 1223514 + totalSize 60010 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 2100 (all row groups) +select count(*) from orc_ppd +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16711 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + METADATA_CACHE_MISS: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2100 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 638 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 524288 + ALLOCATED_USED_BYTES: 269 + CACHE_MISS_BYTES: 249 + METADATA_CACHE_HIT: 1 + METADATA_CACHE_MISS: 1 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +8 +PREHOOK: query: select count(*) from orc_ppd where t <=> 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +22 +PREHOOK: query: select count(*) from orc_ppd where t <=> 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +16 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = "54" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +18 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = -10.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +1 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = cast(53 as float) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +32 +PREHOOK: query: select count(*) from orc_ppd where t = cast(53 as double) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +32 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +1697 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t < 100 and t > 98 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +12 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t <= 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +1713 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t is null +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +6 +PREHOOK: query: -- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where t in (5, 120) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 1100 + SELECTED_ROWGROUPS: 2 +50 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t between 60 and 80 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 103 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +318 +PREHOOK: query: -- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where t = -100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where t <=> -100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where t = 125 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 100 + SELECTED_ROWGROUPS: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where t IN (-100, 125, 200) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 100 + SELECTED_ROWGROUPS: 1 +0 +PREHOOK: query: -- Row group statistics for column s: +-- Entry 0: count: 1000 hasNull: false min: max: zach young sum: 12907 positions: 0,0,0 +-- Entry 1: count: 1000 hasNull: false min: alice allen max: zach zipper sum: 12704 positions: 0,1611,191 +-- Entry 2: count: 100 hasNull: false min: bob davidson max: zzz sum: 1281 positions: 0,3246,373 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where s > "zzz" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = "zach young" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 4402 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 786432 + ALLOCATED_USED_BYTES: 11299 + CACHE_HIT_BYTES: 0 + CACHE_MISS_BYTES: 3980 + METADATA_CACHE_HIT: 1 + METADATA_CACHE_MISS: 1 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: select count(*) from orc_ppd where s <=> "zach zipper" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s <=> "" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: -- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s is null +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 2100 +select count(*) from orc_ppd where s is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2100 +PREHOOK: query: -- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = cast("zach young" as char(50)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 1100 + SELECTED_ROWGROUPS: 2 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = cast("zach young" as char(10)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: select count(*) from orc_ppd where s = cast("zach young" as varchar(10)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: select count(*) from orc_ppd where s = cast("zach young" as varchar(50)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s < "b" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +81 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s > "alice" and s < "bob" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +74 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s in ("alice allen", "") +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +12 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s between "" and "alice allen" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +13 +PREHOOK: query: -- INPUT_RECORDS: 100 (1 row group) +select count(*) from orc_ppd where s between "zz" and "zzz" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 100 + SELECTED_ROWGROUPS: 1 +1 +PREHOOK: query: -- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where s between "zach zipper" and "zzz" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 1100 + SELECTED_ROWGROUPS: 2 +7 +PREHOOK: query: -- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = "hello world" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +0 +PREHOOK: query: select count(*) from orc_ppd where s <=> "apache hive" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 18594 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 3 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where s IN ("a", "z") +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +0 +PREHOOK: query: -- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "sarah ovid" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: -- INPUT_RECORDS: 1100 +select count(*) from orc_ppd where s = "wendy king" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +6 +PREHOOK: query: -- INPUT_RECORDS: 1000 +select count(*) from orc_ppd where s = "wendy king" and t < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 4229 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +2 +PREHOOK: query: -- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "wendy king" and t > 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 0 + HDFS_READ_OPS: 0 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 0 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 4229 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 1100 + SELECTED_ROWGROUPS: 2 +2