diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml index 306a0ed..17968e6 100644 --- a/itests/qtest/pom.xml +++ b/itests/qtest/pom.xml @@ -545,7 +545,7 @@ logFile="${project.build.directory}/testminitezclidrivergen.log" logDirectory="${project.build.directory}/qfile-results/clientpositive/" hadoopVersion="${hadoop.version}" - initScript="q_test_init.sql" + initScript="${initScript}" cleanupScript="q_test_cleanup.sql"/> getStatisticsForScheme(final String scheme, + final List stats) { + List result = new ArrayList<>(); + if (stats != null && scheme != null) { + for (StatisticsData s : stats) { + if (s.getScheme().equalsIgnoreCase(scheme)) { + result.add(s); + } + } + } + return result; + } + + public static Map getCombinedFileSystemStatistics() { + final List allStats = FileSystem.getAllStatistics(); + final Map result = new HashMap<>(); + for (FileSystem.Statistics statistics : allStats) { + final String scheme = statistics.getScheme(); + if (result.containsKey(scheme)) { + FileSystem.Statistics existing = result.get(scheme); + FileSystem.Statistics combined = combineFileSystemStatistics(existing, statistics); + result.put(scheme, combined); + } else { + result.put(scheme, statistics); + } + } + return result; + } + + private static FileSystem.Statistics combineFileSystemStatistics(final FileSystem.Statistics s1, + final FileSystem.Statistics s2) { + FileSystem.Statistics result = new FileSystem.Statistics(s1); + result.incrementReadOps(s2.getReadOps()); + result.incrementLargeReadOps(s2.getLargeReadOps()); + result.incrementWriteOps(s2.getWriteOps()); + result.incrementBytesRead(s2.getBytesRead()); + result.incrementBytesWritten(s2.getBytesWritten()); + return result; + } + + public static List cloneThreadLocalFileSystemStatistics() { + List result = new ArrayList<>(); + // thread local filesystem stats is private and cannot be cloned. So make a copy to new class + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + result.add(new StatisticsData(statistics.getScheme(), statistics.getThreadStatistics())); + } + return result; + } + + public static class StatisticsData { + long bytesRead; + long bytesWritten; + int readOps; + int largeReadOps; + int writeOps; + String scheme; + + public StatisticsData(String scheme, FileSystem.Statistics.StatisticsData fsStats) { + this.scheme = scheme; + this.bytesRead = fsStats.getBytesRead(); + this.bytesWritten = fsStats.getBytesWritten(); + this.readOps = fsStats.getReadOps(); + this.largeReadOps = fsStats.getLargeReadOps(); + this.writeOps = fsStats.getWriteOps(); + } + + public long getBytesRead() { + return bytesRead; + } + + public long getBytesWritten() { + return bytesWritten; + } + + public int getReadOps() { + return readOps; + } + + public int getLargeReadOps() { + return largeReadOps; + } + + public int getWriteOps() { + return writeOps; + } + + public String getScheme() { + return scheme; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" scheme: ").append(scheme); + sb.append(" bytesRead: ").append(bytesRead); + sb.append(" bytesWritten: ").append(bytesWritten); + sb.append(" readOps: ").append(readOps); + sb.append(" largeReadOps: ").append(largeReadOps); + sb.append(" writeOps: ").append(writeOps); + return sb.toString(); + } + } } diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java b/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java index 365ddab..1ed23ba 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/counters/LlapIOCounters.java @@ -15,23 +15,43 @@ */ package org.apache.hadoop.hive.llap.counters; +import java.util.ArrayList; +import java.util.List; + /** * LLAP IO related counters. */ public enum LlapIOCounters { - NUM_VECTOR_BATCHES, - NUM_DECODED_BATCHES, - SELECTED_ROWGROUPS, - NUM_ERRORS, - ROWS_EMITTED, - METADATA_CACHE_HIT, - METADATA_CACHE_MISS, - CACHE_HIT_BYTES, - CACHE_MISS_BYTES, - ALLOCATED_BYTES, - ALLOCATED_USED_BYTES, - TOTAL_IO_TIME_NS, - DECODE_TIME_NS, - HDFS_TIME_NS, - CONSUMER_TIME_NS + NUM_VECTOR_BATCHES(true), + NUM_DECODED_BATCHES(true), + SELECTED_ROWGROUPS(true), + NUM_ERRORS(true), + ROWS_EMITTED(true), + METADATA_CACHE_HIT(true), + METADATA_CACHE_MISS(true), + CACHE_HIT_BYTES(true), + CACHE_MISS_BYTES(true), + ALLOCATED_BYTES(true), + ALLOCATED_USED_BYTES(true), + TOTAL_IO_TIME_NS(false), + DECODE_TIME_NS(false), + HDFS_TIME_NS(false), + CONSUMER_TIME_NS(false); + + // flag to indicate if these counters are subject to change across different test runs + private boolean testSafe; + + LlapIOCounters(final boolean testSafe) { + this.testSafe = testSafe; + } + + public static List testSafeCounterNames() { + List testSafeCounters = new ArrayList<>(); + for (LlapIOCounters counter : values()) { + if (counter.testSafe) { + testSafeCounters.add(counter.name()); + } + } + return testSafeCounters; + } } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java b/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java index a53ac61..0c858eb 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/counters/QueryFragmentCounters.java @@ -135,4 +135,8 @@ public String toString() { sb.append(" ]"); return sb.toString(); } + + public TezCounters getTezCounters() { + return tezCounters; + } } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java new file mode 100644 index 0000000..1395776 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/StatsRecordingThreadPool.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.daemon.impl; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.FutureTask; +import java.util.concurrent.RunnableFuture; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.io.encoded.OrcEncodedDataReader; +import org.apache.tez.common.counters.FileSystemCounter; +import org.apache.tez.common.counters.TezCounters; +import org.apache.tez.runtime.task.TaskRunner2Callable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Custom thread pool implementation that records per thread file system statistics in TezCounters. + * The way it works is we capture before and after snapshots of file system thread statistics, + * compute the delta difference in statistics and update them in tez task counters. + */ +public class StatsRecordingThreadPool extends ThreadPoolExecutor { + private static final Logger LOG = LoggerFactory.getLogger(StatsRecordingThreadPool.class); + // map that stores snapshot of FileSystem's thread local stats object before thread execution + private final Map> threadsStatsBefore; + // uncaught exception handler that will be set for all threads before execution + private Thread.UncaughtExceptionHandler uncaughtExceptionHandler; + + public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize, + final long keepAliveTime, + final TimeUnit unit, + final BlockingQueue workQueue, + final ThreadFactory threadFactory) { + this(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory, null); + } + + public StatsRecordingThreadPool(final int corePoolSize, final int maximumPoolSize, + final long keepAliveTime, + final TimeUnit unit, + final BlockingQueue workQueue, + final ThreadFactory threadFactory, Thread.UncaughtExceptionHandler handler) { + super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory); + this.threadsStatsBefore = new ConcurrentHashMap<>(corePoolSize); + this.uncaughtExceptionHandler = handler; + } + + @Override + protected RunnableFuture newTaskFor(final Callable callable) { + return new FutureTask(new WrappedCallable(callable, threadsStatsBefore)); + } + + @Override + protected void beforeExecute(final Thread t, final Runnable r) { + super.beforeExecute(t, r); + // setup uncaught exception handler for thread + if (uncaughtExceptionHandler != null) { + t.setUncaughtExceptionHandler(uncaughtExceptionHandler); + } + // clone thread local file system statistics + threadsStatsBefore.put(t.getId(), LlapUtil.cloneThreadLocalFileSystemStatistics()); + + // NOTE: we don't use afterExecute to capture file system statistics post execution because + // of callables which runs on a separate thread and may finish after afterExecute invocation. + // Also because of the way tez sends task completion notification there is a race between + // counters updation and the last hearbeat that finalizes the task counters + } + + public void setUncaughtExceptionHandler(Thread.UncaughtExceptionHandler handler) { + this.uncaughtExceptionHandler = handler; + } + + /** + * Callable that wraps the actual callable submitted to the thread pool and invokes completion + * listener in finally block. + * + * @param - actual callable + */ + private static class WrappedCallable implements Callable { + private Callable actualCallable; + private final Map> threadsStatsBefore; + + public WrappedCallable(final Callable callable, + final Map> threadsStatsBefore) { + this.actualCallable = callable; + this.threadsStatsBefore = threadsStatsBefore; + } + + @Override + public V call() throws Exception { + try { + return actualCallable.call(); + } finally { + updateFileSystemCounters(actualCallable); + } + } + + private void updateFileSystemCounters(final Callable actualCallable) { + Thread thread = Thread.currentThread(); + try { + TezCounters tezCounters = null; + // add tez counters for task execution and llap io + if (actualCallable instanceof TaskRunner2Callable) { + TaskRunner2Callable taskRunner2Callable = (TaskRunner2Callable) actualCallable; + // counters for task execution side + tezCounters = taskRunner2Callable.addAndGetTezCounter(FileSystemCounter.class.getName()); + } else if (actualCallable instanceof OrcEncodedDataReader) { + // counters for llap io side + tezCounters = ((OrcEncodedDataReader) actualCallable).getTezCounters(); + } + + if (tezCounters != null) { + List statsBefore = threadsStatsBefore.get(thread.getId()); + if (statsBefore != null) { + // if there are multiple stats for the same scheme (from different NameNode), this + // method will squash them together + Map schemeToStats = LlapUtil + .getCombinedFileSystemStatistics(); + for (Map.Entry entry : schemeToStats.entrySet()) { + final String scheme = entry.getKey(); + FileSystem.Statistics statistics = entry.getValue(); + FileSystem.Statistics.StatisticsData threadFSStats = statistics + .getThreadStatistics(); + List allStatsBefore = LlapUtil + .getStatisticsForScheme(scheme, statsBefore); + long bytesReadDelta = 0; + long bytesWrittenDelta = 0; + long readOpsDelta = 0; + long largeReadOpsDelta = 0; + long writeOpsDelta = 0; + // there could be more scheme after execution as execution might be accessing a + // different filesystem. So if we don't find a matching scheme before execution we + // just use the after execution values directly without computing delta difference + if (allStatsBefore != null && !allStatsBefore.isEmpty()) { + for (LlapUtil.StatisticsData sb : allStatsBefore) { + bytesReadDelta += threadFSStats.getBytesRead() - sb.getBytesRead(); + bytesWrittenDelta += threadFSStats.getBytesWritten() - sb.getBytesWritten(); + readOpsDelta += threadFSStats.getReadOps() - sb.getReadOps(); + largeReadOpsDelta += threadFSStats.getLargeReadOps() - sb.getLargeReadOps(); + writeOpsDelta += threadFSStats.getWriteOps() - sb.getWriteOps(); + } + } else { + bytesReadDelta = threadFSStats.getBytesRead(); + bytesWrittenDelta = threadFSStats.getBytesWritten(); + readOpsDelta = threadFSStats.getReadOps(); + largeReadOpsDelta = threadFSStats.getLargeReadOps(); + writeOpsDelta = threadFSStats.getWriteOps(); + } + tezCounters.findCounter(scheme, FileSystemCounter.BYTES_READ) + .increment(bytesReadDelta); + tezCounters.findCounter(scheme, FileSystemCounter.BYTES_WRITTEN) + .increment(bytesWrittenDelta); + tezCounters.findCounter(scheme, FileSystemCounter.READ_OPS).increment(readOpsDelta); + tezCounters.findCounter(scheme, FileSystemCounter.LARGE_READ_OPS) + .increment(largeReadOpsDelta); + tezCounters.findCounter(scheme, FileSystemCounter.WRITE_OPS) + .increment(writeOpsDelta); + + if (LOG.isDebugEnabled()) { + LOG.debug("Updated stats: instance: {} thread name: {} thread id: {} scheme: {} " + + "bytesRead: {} bytesWritten: {} readOps: {} largeReadOps: {} writeOps: {}", + actualCallable.getClass().getSimpleName(), thread.getName(), thread.getId(), + scheme, bytesReadDelta, bytesWrittenDelta, readOpsDelta, largeReadOpsDelta, + writeOpsDelta); + } + } + } else { + LOG.warn("File system statistics snapshot before execution of thread is null." + + "Thread name: {} id: {} allStats: {}", thread.getName(), thread.getId(), + threadsStatsBefore); + } + } else { + LOG.warn("TezCounters is null for callable type: {}", + actualCallable.getClass().getSimpleName()); + } + } finally { + // remove the stored snapshot of file system statistics + threadsStatsBefore.remove(thread.getId()); + } + } + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java index 6c853a6..f1d3c9d 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java @@ -23,7 +23,9 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -39,6 +41,7 @@ import org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorMetrics; import org.apache.hadoop.hive.llap.protocol.LlapTaskUmbilicalProtocol; import org.apache.hadoop.hive.llap.tez.Converters; +import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils; import org.apache.hadoop.hive.ql.io.IOContextMap; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; @@ -46,6 +49,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.CallableWithNdc; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.security.JobTokenIdentifier; @@ -100,7 +104,7 @@ private final FragmentCompletionHandler fragmentCompletionHanler; private volatile TezTaskRunner2 taskRunner; private volatile TaskReporterInterface taskReporter; - private volatile ListeningExecutorService executor; + private volatile ExecutorService executor; private LlapTaskUmbilicalProtocol umbilical; private volatile long startTime; private volatile String threadName; @@ -181,12 +185,13 @@ protected TaskRunner2Result callInternal() throws Exception { } // TODO This executor seems unnecessary. Here and TezChild - ExecutorService executorReal = Executors.newFixedThreadPool(1, + executor = new StatsRecordingThreadPool(1, 1, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue(), new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("TezTaskRunner") .build()); - executor = MoreExecutors.listeningDecorator(executorReal); // TODO Consolidate this code with TezChild. runtimeWatch.start(); @@ -214,12 +219,7 @@ public LlapTaskUmbilicalProtocol run() throws Exception { } }); - TezTaskAttemptID taskAttemptID = taskSpec.getTaskAttemptID(); - TezTaskID taskId = taskAttemptID.getTaskID(); - TezVertexID tezVertexID = taskId.getVertexID(); - TezDAGID tezDAGID = tezVertexID.getDAGId(); - String fragFullId = Joiner.on('_').join(tezDAGID.getId(), tezVertexID.getId(), taskId.getId(), - taskAttemptID.getId()); + String fragFullId = LlapTezUtils.stripAttemptPrefix(taskSpec.getTaskAttemptID().toString()); taskReporter = new LlapTaskReporter( umbilical, confParams.amHeartbeatIntervalMsMax, diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 298f788..8cfbd95 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -22,16 +22,18 @@ import java.io.IOException; import java.util.LinkedList; import java.util.List; +import java.util.concurrent.ExecutorService; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.llap.ConsumerFeedback; -import org.apache.hadoop.hive.llap.DebugUtils; import org.apache.hadoop.hive.llap.counters.FragmentCountersMap; import org.apache.hadoop.hive.llap.counters.LlapIOCounters; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; +import org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool; import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer; import org.apache.hadoop.hive.llap.io.decode.ReadPipeline; +import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -57,13 +59,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; - public class LlapInputFormat implements InputFormat, VectorizedInputFormatInterface, SelfDescribingInputFormatInterface, AvoidSplitCombination { @@ -71,12 +66,12 @@ private final InputFormat sourceInputFormat; private final AvoidSplitCombination sourceASC; private final ColumnVectorProducer cvp; - private final ListeningExecutorService executor; + private final ExecutorService executor; private final String hostName; @SuppressWarnings("rawtypes") LlapInputFormat(InputFormat sourceInputFormat, ColumnVectorProducer cvp, - ListeningExecutorService executor) { + ExecutorService executor) { // TODO: right now, we do nothing with source input format, ORC-only in the first cut. // We'd need to plumb it thru and use it to get data to cache/etc. assert sourceInputFormat instanceof OrcInputFormat; @@ -153,19 +148,13 @@ public LlapRecordReader( this.columnIds = includedCols; this.sarg = ConvertAstToSearchArg.createFromConf(job); this.columnNames = ColumnProjectionUtils.getReadColumnNames(job); - String dagId = job.get("tez.mapreduce.dag.index"); - String vertexId = job.get("tez.mapreduce.vertex.index"); - String taskId = job.get("tez.mapreduce.task.index"); - String taskAttemptId = job.get("tez.mapreduce.task.attempt.index"); + String appId = LlapTezUtils.getFragmentId(job); TezCounters taskCounters = null; - if (dagId != null && vertexId != null && taskId != null && taskAttemptId != null) { - String fullId = Joiner.on('_').join(dagId, vertexId, taskId, taskAttemptId); - taskCounters = FragmentCountersMap.getCountersForFragment(fullId); - LOG.info("Received dagid_vertexid_taskid_attempid: {}", fullId); + if (appId != null) { + taskCounters = FragmentCountersMap.getCountersForFragment(appId); + LOG.info("Received full application id: {}", appId); } else { - LOG.warn("Not using tez counters as some identifier is null." + - " dagId: {} vertexId: {} taskId: {} taskAttempId: {}", - dagId, vertexId, taskId, taskAttemptId); + LOG.warn("Not using tez counters as application id string is null"); } this.counters = new QueryFragmentCounters(job, taskCounters); this.counters.setDesc(QueryFragmentCounters.Desc.MACHINE, hostName); @@ -233,17 +222,12 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti } - private final class UncaughtErrorHandler implements FutureCallback { + private final class IOUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler { @Override - public void onSuccess(Void result) { - // Successful execution of reader is supposed to call setDone. - } - - @Override - public void onFailure(Throwable t) { - // Reader is not supposed to throw AFTER calling setError. - LlapIoImpl.LOG.error("Unhandled error from reader thread " + t.getMessage()); - setError(t); + public void uncaughtException(final Thread t, final Throwable e) { + LlapIoImpl.LOG.error("Unhandled error from reader thread. threadName: {} threadId: {}" + + " Message: {}", t.getName(), t.getId(), e.getMessage()); + setError(e); } } @@ -252,9 +236,12 @@ private void startRead() { ReadPipeline rp = cvp.createReadPipeline( this, split, columnIds, sarg, columnNames, counters); feedback = rp; - ListenableFuture future = executor.submit(rp.getReadCallable()); - // TODO: we should NOT do this thing with handler. Reader needs to do cleanup in most cases. - Futures.addCallback(future, new UncaughtErrorHandler()); + if (executor instanceof StatsRecordingThreadPool) { + // Every thread created by this thread pool will use the same handler + ((StatsRecordingThreadPool) executor) + .setUncaughtExceptionHandler(new IOUncaughtExceptionHandler()); + } + executor.submit(rp.getReadCallable()); } ColumnVectorBatch nextCvb() throws InterruptedException, IOException { diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index 9316dff..9deef0c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -22,10 +22,15 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import javax.management.ObjectName; +import org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -69,7 +74,7 @@ private static final String MODE_CACHE = "cache", MODE_ALLOCATOR = "allocator"; private final ColumnVectorProducer cvp; - private final ListeningExecutorService executor; + private final ExecutorService executor; private final LlapDaemonCacheMetrics cacheMetrics; private final LlapDaemonIOMetrics ioMetrics; private ObjectName buddyAllocatorMXBean; @@ -137,8 +142,10 @@ private LlapIoImpl(Configuration conf) throws IOException { } // IO thread pool. Listening is used for unhandled errors for now (TODO: remove?) int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_IO_THREADPOOL_SIZE); - executor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(numThreads, - new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build())); + executor = new StatsRecordingThreadPool(numThreads, numThreads, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue(), + new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); // TODO: this should depends on input format and be in a map, or something. this.cvp = new OrcColumnVectorProducer( metadataCache, orcCache, bufferManager, conf, cacheMetrics, ioMetrics); diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 69c0647..2164a3a 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; import org.apache.orc.impl.DataReaderProperties; import org.apache.orc.impl.OrcIndex; +import org.apache.tez.common.counters.TezCounters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -911,4 +912,8 @@ public OrcIndex readRowIndex(StripeInformation stripe, return orcDataReader.readStripeFooter(stripe); } } + + public TezCounters getTezCounters() { + return counters.getTezCounters(); + } } diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java index 2c3e53c..c034432 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTezUtils.java @@ -14,11 +14,24 @@ package org.apache.hadoop.hive.llap.tezplugins; +import java.text.NumberFormat; + import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezTaskID; +import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.mapreduce.hadoop.MRHelpers; +import org.apache.tez.mapreduce.hadoop.MRInputHelpers; +import org.apache.tez.mapreduce.hadoop.MRJobConfig; import org.apache.tez.mapreduce.input.MRInput; import org.apache.tez.mapreduce.input.MRInputLegacy; import org.apache.tez.mapreduce.input.MultiMRInput; +import com.google.common.base.Joiner; + @InterfaceAudience.Private public class LlapTezUtils { public static boolean isSourceOfInterest(String inputClassName) { @@ -26,4 +39,38 @@ public static boolean isSourceOfInterest(String inputClassName) { return !(inputClassName.equals(MRInputLegacy.class.getName()) || inputClassName.equals( MultiMRInput.class.getName()) || inputClassName.equals(MRInput.class.getName())); } + + // FIXME: This is no longer required after TEZ-3290 + public static String getFragmentId(final JobConf job) { + int dagIdx = MRInputHelpers.getDagIndex(job); + int vertexIdx = MRInputHelpers.getVertexIndex(job); + int taskIdx = MRInputHelpers.getTaskIndex(job); + int taskAttemptIdx = MRInputHelpers.getTaskAttemptIndex(job); + String appId = MRInputHelpers.getApplicationIdString(job); + ApplicationId applicationId = createApplicationIdFromString(appId); + if (applicationId != null) { + TezTaskAttemptID tezTaskAttemptID = TezTaskAttemptID.getInstance( + TezTaskID.getInstance(TezVertexID.getInstance( + TezDAGID.getInstance(applicationId, dagIdx), vertexIdx), taskIdx), taskAttemptIdx); + return stripAttemptPrefix(tezTaskAttemptID.toString()); + } + return null; + } + + private static ApplicationId createApplicationIdFromString(final String appIdStr) { + String[] tokens = appIdStr.split("_"); + if (tokens.length == 3) { + long clusterTimestamp = Long.parseLong(tokens[1]); + int appId = Integer.parseInt(tokens[2]); + return ApplicationId.newInstance(clusterTimestamp, appId); + } + return null; + } + + public static String stripAttemptPrefix(final String s) { + if (s.startsWith(TezTaskAttemptID.ATTEMPT)) { + return s.substring(TezTaskAttemptID.ATTEMPT.length() + 1); + } + return s; + } } diff --git a/pom.xml b/pom.xml index 63a5ae1..b690c47 100644 --- a/pom.xml +++ b/pom.xml @@ -173,7 +173,7 @@ 1.0.1 1.7.10 4.0.4 - 0.8.3 + 0.8.4-SNAPSHOT 0.90.2-incubating 2.2.0 1.6.0 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java index 838f320..d87d301 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java @@ -36,6 +36,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.counters.LlapIOCounters; import org.apache.hadoop.hive.ql.Context; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.tez.common.counters.FileSystemCounter; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; @@ -80,6 +82,7 @@ private static final String QUERY_EXEC_SUMMARY_HEADER = "Query Execution Summary"; private static final String TASK_SUMMARY_HEADER = "Task Execution Summary"; private static final String LLAP_IO_SUMMARY_HEADER = "LLAP IO Summary"; + private static final String FS_COUNTERS_SUMMARY_HEADER = "FileSystem Counters Summary"; // keep this within 80 chars width. If more columns needs to be added then update min terminal // width requirement and SEPARATOR width accordingly @@ -106,6 +109,9 @@ "VERTICES", "ROWGROUPS", "META_HIT", "META_MISS", "DATA_HIT", "DATA_MISS", "ALLOCATION", "USED", "TOTAL_IO"); + // FileSystem counters + private static final String FS_COUNTERS_HEADER_FORMAT = "%10s %15s %13s %18s %18s %13s"; + // Methods summary private static final String OPERATION_SUMMARY = "%-35s %9s"; private static final String OPERATION = "OPERATION"; @@ -391,6 +397,10 @@ public int monitorExecution(final DAGClient dagClient, HiveConf conf, console.printInfo(LLAP_IO_SUMMARY_HEADER); printLlapIOSummary(progressMap, console, dagClient); console.printInfo(SEPARATOR); + console.printInfo(""); + + console.printInfo(FS_COUNTERS_SUMMARY_HEADER); + printFSCountersSummary(progressMap, console, dagClient); } console.printInfo(""); @@ -697,6 +707,62 @@ private void printLlapIOSummary(Map progressMap, LogHelper con } } + private void printFSCountersSummary(Map progressMap, LogHelper console, + DAGClient dagClient) { + SortedSet keys = new TreeSet<>(progressMap.keySet()); + Set statusOptions = new HashSet<>(1); + statusOptions.add(StatusGetOpts.GET_COUNTERS); + // Assuming FileSystem.getAllStatistics() returns all schemes that are accessed on task side + // as well. If not, we need a way to get all the schemes that are accessed by the tez task/llap. + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + final String scheme = statistics.getScheme().toUpperCase(); + final String fsCountersHeader = String.format(FS_COUNTERS_HEADER_FORMAT, + "VERTICES", "BYTES_READ", "READ_OPS", "LARGE_READ_OPS", "BYTES_WRITTEN", "WRITE_OPS"); + + console.printInfo(""); + reprintLineWithColorAsBold("Scheme: " + scheme, Ansi.Color.RED); + console.printInfo(SEPARATOR); + reprintLineWithColorAsBold(fsCountersHeader, Ansi.Color.CYAN); + console.printInfo(SEPARATOR); + + for (String vertexName : keys) { + TezCounters vertexCounters = null; + try { + vertexCounters = dagClient.getVertexStatus(vertexName, statusOptions) + .getVertexCounters(); + } catch (IOException e) { + // best attempt, shouldn't really kill DAG for this + } catch (TezException e) { + // best attempt, shouldn't really kill DAG for this + } + if (vertexCounters != null) { + final String counterGroup = FileSystemCounter.class.getName(); + final long bytesRead = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.BYTES_READ.name()); + final long bytesWritten = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.BYTES_WRITTEN.name()); + final long readOps = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.READ_OPS.name()); + final long largeReadOps = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.LARGE_READ_OPS.name()); + final long writeOps = getCounterValueByGroupName(vertexCounters, + counterGroup, scheme + "_" + FileSystemCounter.WRITE_OPS.name()); + + String fsCountersSummary = String.format(FS_COUNTERS_HEADER_FORMAT, + vertexName, + humanReadableByteCount(bytesRead), + readOps, + largeReadOps, + humanReadableByteCount(bytesWritten), + writeOps); + console.printInfo(fsCountersSummary); + } + } + + console.printInfo(SEPARATOR); + } + } + private void printStatusInPlace(Map progressMap, long startTime, boolean vextexStatusFromAM, DAGClient dagClient) { StringBuilder reportBuffer = new StringBuilder(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java index 81bda08..412f45c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java @@ -19,6 +19,8 @@ import java.util.List; +import org.apache.hadoop.hive.llap.counters.LlapIOCounters; +import org.apache.tez.common.counters.FileSystemCounter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -57,12 +59,30 @@ public void run(HookContext hookContext) throws Exception { LOG.info("Printing summary for tez task: " + tezTask.getName()); TezCounters counters = tezTask.getTezCounters(); if (counters != null) { + String hiveCountersGroup = HiveConf.getVar(conf, HiveConf.ConfVars.HIVECOUNTERGROUP); for (CounterGroup group : counters) { - if ("HIVE".equals(group.getDisplayName())) { + if (hiveCountersGroup.equals(group.getDisplayName())) { console.printError(tezTask.getId() + " HIVE COUNTERS:"); for (TezCounter counter : group) { console.printError(" " + counter.getDisplayName() + ": " + counter.getValue()); } + } else if (group.getName().equals(FileSystemCounter.class.getName())) { + console.printError(tezTask.getId() + " FILE SYSTEM COUNTERS:"); + for (TezCounter counter : group) { + // HDFS counters should be relatively consistent across test runs when compared to + // local file system counters + if (counter.getName().contains("HDFS")) { + console.printError(" " + counter.getDisplayName() + ": " + counter.getValue()); + } + } + } else if (group.getName().equals(LlapIOCounters.class.getName())) { + console.printError(tezTask.getId() + " LLAP IO COUNTERS:"); + List testSafeCounters = LlapIOCounters.testSafeCounterNames(); + for (TezCounter counter : group) { + if (testSafeCounters.contains(counter.getDisplayName())) { + console.printError(" " + counter.getDisplayName() + ": " + counter.getValue()); + } + } } } } diff --git a/ql/src/test/queries/clientpositive/orc_llap_counters.q b/ql/src/test/queries/clientpositive/orc_llap_counters.q new file mode 100644 index 0000000..1bd55d3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_llap_counters.q @@ -0,0 +1,182 @@ +set hive.mapred.mode=nonstrict; +SET hive.optimize.index.filter=true; +SET hive.cbo.enable=false; +SET hive.vectorized.execution.enabled=true; +SET hive.llap.io.enabled=true; + +CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging; +LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging; + +CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s; + +-- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1; +insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1; + +CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s; + +describe formatted orc_ppd; + +SET hive.fetch.task.conversion=none; +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; + +-- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 2100 (all row groups) +select count(*) from orc_ppd; + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; +select count(*) from orc_ppd where t <=> 50; +select count(*) from orc_ppd where t <=> 100; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = "54"; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = -10.0; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = cast(53 as float); +select count(*) from orc_ppd where t = cast(53 as double); + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t < 100; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t < 100 and t > 98; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t <= 100; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t is null; + +-- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where t in (5, 120); + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t between 60 and 80; + +-- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where t = -100; +select count(*) from orc_ppd where t <=> -100; +select count(*) from orc_ppd where t = 125; +select count(*) from orc_ppd where t IN (-100, 125, 200); + +-- Row group statistics for column s: +-- Entry 0: count: 1000 hasNull: false min: max: zach young sum: 12907 positions: 0,0,0 +-- Entry 1: count: 1000 hasNull: false min: alice allen max: zach zipper sum: 12704 positions: 0,1611,191 +-- Entry 2: count: 100 hasNull: false min: bob davidson max: zzz sum: 1281 positions: 0,3246,373 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where s > "zzz"; + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = "zach young"; +select count(*) from orc_ppd where s <=> "zach zipper"; +select count(*) from orc_ppd where s <=> ""; + +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s is null; + +-- INPUT_RECORDS: 2100 +select count(*) from orc_ppd where s is not null; + +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = cast("zach young" as char(50)); + +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = cast("zach young" as char(10)); +select count(*) from orc_ppd where s = cast("zach young" as varchar(10)); +select count(*) from orc_ppd where s = cast("zach young" as varchar(50)); + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s < "b"; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s > "alice" and s < "bob"; + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s in ("alice allen", ""); + +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s between "" and "alice allen"; + +-- INPUT_RECORDS: 100 (1 row group) +select count(*) from orc_ppd where s between "zz" and "zzz"; + +-- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where s between "zach zipper" and "zzz"; + +-- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = "hello world"; +select count(*) from orc_ppd where s <=> "apache hive"; +select count(*) from orc_ppd where s IN ("a", "z"); + +-- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "sarah ovid"; + +-- INPUT_RECORDS: 1100 +select count(*) from orc_ppd where s = "wendy king"; + +-- INPUT_RECORDS: 1000 +select count(*) from orc_ppd where s = "wendy king" and t < 0; + +-- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "wendy king" and t > 100; diff --git a/ql/src/test/queries/clientpositive/orc_llap_counters1.q b/ql/src/test/queries/clientpositive/orc_llap_counters1.q new file mode 100644 index 0000000..06d6c4f --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_llap_counters1.q @@ -0,0 +1,83 @@ +set hive.mapred.mode=nonstrict; +SET hive.optimize.index.filter=true; +SET hive.cbo.enable=false; +SET hive.vectorized.execution.enabled=true; +SET hive.llap.io.enabled=true; + +CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging; +LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging; + +CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s; + +-- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1; +insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1; + +CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s; + +describe formatted orc_ppd; + +SET hive.fetch.task.conversion=none; +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; + +-- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 2100 (all row groups) +select count(*) from orc_ppd where t > -100; + +-- 100% LLAP cache hit +select count(*) from orc_ppd where t > -100; + +DROP TABLE staging; +DROP TABLE orc_ppd_staging; +DROP TABLE orc_ppd; diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out new file mode 100644 index 0000000..6fe3a8c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -0,0 +1,1245 @@ +PREHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd_staging +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd_staging +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_ppd_staging +POSTHOOK: Output: default@orc_ppd +POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: describe formatted orc_ppd +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_ppd +POSTHOOK: query: describe formatted orc_ppd +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_ppd +# col_name data_type comment + +t tinyint +si smallint +i int +b bigint +f float +d double +bo boolean +s string +c char(50) +v varchar(50) +da date +ts timestamp +dec decimal(4,2) +bin binary + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2100 + orc.bloom.filter.columns * + orc.row.index.stride 1000 + rawDataSize 1223514 + totalSize 60010 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 2100 (all row groups) +select count(*) from orc_ppd +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16711 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + METADATA_CACHE_MISS: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2100 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 638 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 524288 + ALLOCATED_USED_BYTES: 269 + CACHE_MISS_BYTES: 249 + METADATA_CACHE_HIT: 1 + METADATA_CACHE_MISS: 1 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +8 +PREHOOK: query: select count(*) from orc_ppd where t <=> 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +22 +PREHOOK: query: select count(*) from orc_ppd where t <=> 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +16 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = "54" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +18 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = -10.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +1 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = cast(53 as float) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +32 +PREHOOK: query: select count(*) from orc_ppd where t = cast(53 as double) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +32 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +1697 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t < 100 and t > 98 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +12 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t <= 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +1713 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t is null +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +6 +PREHOOK: query: -- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where t in (5, 120) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +50 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t between 60 and 80 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 103 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +318 +PREHOOK: query: -- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where t = -100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where t <=> -100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where t = 125 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 100 + SELECTED_ROWGROUPS: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where t IN (-100, 125, 200) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17660 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- Row group statistics for column s: +-- Entry 0: count: 1000 hasNull: false min: max: zach young sum: 12907 positions: 0,0,0 +-- Entry 1: count: 1000 hasNull: false min: alice allen max: zach zipper sum: 12704 positions: 0,1611,191 +-- Entry 2: count: 100 hasNull: false min: bob davidson max: zzz sum: 1281 positions: 0,3246,373 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where s > "zzz" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = "zach young" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 4402 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 786432 + ALLOCATED_USED_BYTES: 11299 + CACHE_HIT_BYTES: 0 + CACHE_MISS_BYTES: 3980 + METADATA_CACHE_HIT: 1 + METADATA_CACHE_MISS: 1 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: select count(*) from orc_ppd where s <=> "zach zipper" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s <=> "" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: -- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s is null +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 2100 +select count(*) from orc_ppd where s is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2100 +PREHOOK: query: -- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = cast("zach young" as char(50)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 1100 + SELECTED_ROWGROUPS: 2 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where s = cast("zach young" as char(10)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: select count(*) from orc_ppd where s = cast("zach young" as varchar(10)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: select count(*) from orc_ppd where s = cast("zach young" as varchar(50)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s < "b" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +81 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s > "alice" and s < "bob" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +74 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s in ("alice allen", "") +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +12 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where s between "" and "alice allen" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 2000 + SELECTED_ROWGROUPS: 2 +13 +PREHOOK: query: -- INPUT_RECORDS: 100 (1 row group) +select count(*) from orc_ppd where s between "zz" and "zzz" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 100 + SELECTED_ROWGROUPS: 1 +1 +PREHOOK: query: -- INPUT_RECORDS: 1100 (2 row groups) +select count(*) from orc_ppd where s between "zach zipper" and "zzz" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 1100 + SELECTED_ROWGROUPS: 2 +7 +PREHOOK: query: -- bloom filter tests +-- INPUT_RECORDS: 0 +select count(*) from orc_ppd where s = "hello world" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +0 +PREHOOK: query: select count(*) from orc_ppd where s <=> "apache hive" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 18594 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: select count(*) from orc_ppd where s IN ("a", "z") +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +0 +PREHOOK: query: -- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "sarah ovid" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2 +PREHOOK: query: -- INPUT_RECORDS: 1100 +select count(*) from orc_ppd where s = "wendy king" +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 3980 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +6 +PREHOOK: query: -- INPUT_RECORDS: 1000 +select count(*) from orc_ppd where s = "wendy king" and t < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 4229 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 1000 + SELECTED_ROWGROUPS: 1 +2 +PREHOOK: query: -- INPUT_RECORDS: 100 +select count(*) from orc_ppd where s = "wendy king" and t > 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 4229 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 2 + NUM_VECTOR_BATCHES: 2 + ROWS_EMITTED: 1100 + SELECTED_ROWGROUPS: 2 +2 diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out new file mode 100644 index 0000000..75fd0e1 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out @@ -0,0 +1,331 @@ +PREHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd_staging +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd_staging +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_ppd_staging +POSTHOOK: Output: default@orc_ppd +POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: describe formatted orc_ppd +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_ppd +POSTHOOK: query: describe formatted orc_ppd +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_ppd +# col_name data_type comment + +t tinyint +si smallint +i int +b bigint +f float +d double +bo boolean +s string +c char(50) +v varchar(50) +da date +ts timestamp +dec decimal(4,2) +bin binary + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2100 + orc.bloom.filter.columns * + orc.row.index.stride 1000 + rawDataSize 1223514 + totalSize 60010 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 2100 (all row groups) +select count(*) from orc_ppd where t > -100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17046 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 6 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 524288 + ALLOCATED_USED_BYTES: 269 + CACHE_MISS_BYTES: 249 + METADATA_CACHE_MISS: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2094 +PREHOOK: query: -- 100% LLAP cache hit +select count(*) from orc_ppd where t > -100 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 0 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 249 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 +2094 +PREHOOK: query: DROP TABLE staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@staging +PREHOOK: Output: default@staging +PREHOOK: query: DROP TABLE orc_ppd_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_ppd_staging +PREHOOK: Output: default@orc_ppd_staging +PREHOOK: query: DROP TABLE orc_ppd +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd