diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e540d023bd..9eb76e796d 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4266,6 +4266,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If the query results cache is enabled. This will keep results of previously executed queries " + "to be reused if the same query is executed again."), + HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED("hive.query.results.cache.nontransactional.tables.enabled", false, + "If the query results cache is enabled for queries involving non-transactional tables." + + "Users who enable this setting should be willing to tolerate some amount of stale results in the cache."), + HIVE_QUERY_RESULTS_CACHE_WAIT_FOR_PENDING_RESULTS("hive.query.results.cache.wait.for.pending.results", true, "Should a query wait for the pending results of an already running query, " + "in order to use the cached result when it becomes ready"), diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 48d62a8bf9..b8d46a2b04 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -244,6 +244,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ ptf_streaming.q,\ results_cache_1.q,\ results_cache_empty_result.q,\ + results_cache_invalidation.q,\ + results_cache_transactional.q,\ sample1.q,\ selectDistinctStar.q,\ select_dummy_source.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index a88453c978..4acdd9b49b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -1977,9 +1977,14 @@ private void postExecutionCacheActions() throws Exception { PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE); + ValidTxnWriteIdList txnWriteIdList = null; + if (plan.hasAcidResourcesInQuery()) { + txnWriteIdList = AcidUtils.getValidTxnWriteIdList(conf); + } boolean savedToCache = QueryResultsCache.getInstance().setEntryValid( cacheUsage.getCacheEntry(), - plan.getFetchTask().getWork()); + plan.getFetchTask().getWork(), + txnWriteIdList); LOG.info("savedToCache: {}", savedToCache); if (savedToCache) { useFetchFromCache(cacheUsage.getCacheEntry()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java index b1a3646624..90c8ec3cae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java @@ -43,6 +43,7 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Supplier; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileSystem; @@ -52,17 +53,21 @@ import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; import org.apache.hadoop.hive.common.metrics.common.MetricsVariable; +import org.apache.hadoop.hive.common.ValidTxnWriteIdList; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.Entity.Type; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.apache.hadoop.hive.ql.parse.TableAccessInfo; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hive.common.util.TxnIdUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,10 +81,12 @@ public static class LookupInfo { private String queryText; + private Supplier txnWriteIdListProvider; - public LookupInfo(String queryText) { + public LookupInfo(String queryText, Supplier txnWriteIdListProvider) { super(); this.queryText = queryText; + this.txnWriteIdListProvider = txnWriteIdListProvider; } public String getQueryText() { @@ -174,6 +181,7 @@ public void setInputs(Set inputs) { private AtomicInteger readers = new AtomicInteger(0); private ScheduledFuture invalidationFuture = null; private volatile CacheEntryStatus status = CacheEntryStatus.PENDING; + private ValidTxnWriteIdList txnWriteIdList; public void releaseReader() { int readerCount = 0; @@ -389,15 +397,20 @@ public CacheEntry lookup(LookupInfo request) { LOG.debug("QueryResultsCache lookup for query: {}", request.queryText); - boolean foundPending = false; + boolean foundPending = false; + // Cannot entries while we currently hold read lock, so keep track of them to delete later. + Set entriesToRemove = new HashSet(); Lock readLock = rwLock.readLock(); try { + // Note: ReentrantReadWriteLock deos not allow upgrading a read lock to a write lock. + // Care must be taken while under read lock, to make sure we do not perform any actions + // which attempt to take a write lock. readLock.lock(); Set candidates = queryMap.get(request.queryText); if (candidates != null) { CacheEntry pendingResult = null; for (CacheEntry candidate : candidates) { - if (entryMatches(request, candidate)) { + if (entryMatches(request, candidate, entriesToRemove)) { CacheEntryStatus entryStatus = candidate.status; if (entryStatus == CacheEntryStatus.VALID) { result = candidate; @@ -422,6 +435,11 @@ public CacheEntry lookup(LookupInfo request) { readLock.unlock(); } + // Now that we have exited read lock it is safe to remove any invalid entries. + for (CacheEntry invalidEntry : entriesToRemove) { + removeEntry(invalidEntry); + } + LOG.debug("QueryResultsCache lookup result: {}", result); incrementMetric(MetricsConstant.QC_LOOKUPS); if (result != null) { @@ -477,7 +495,7 @@ public CacheEntry addToCache(QueryInfo queryInfo) { * @param fetchWork * @return */ - public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork) { + public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork, ValidTxnWriteIdList txnWriteIdList) { String queryText = cacheEntry.getQueryText(); boolean dataDirMoved = false; Path queryResultsPath = null; @@ -527,6 +545,7 @@ public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork) { cacheEntry.size = resultSize; this.cacheSize += resultSize; cacheEntry.createTime = System.currentTimeMillis(); + cacheEntry.txnWriteIdList = txnWriteIdList; cacheEntry.setStatus(CacheEntryStatus.VALID); // Mark this entry as being in use. Caller will need to release later. @@ -601,7 +620,15 @@ public long getSize() { private static final float LRU_LOAD_FACTOR = 0.75f; private static final CacheEntry[] EMPTY_CACHEENTRY_ARRAY = {}; - private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry) { + /** + * Check that the cache entry matches the lookupInfo. + * @param lookupInfo + * @param entry + * @param entriesToRemove Set of entries to be removed after exiting read lock section. + * If the entry is found to be invalid it will be added to this set. + * @return + */ + private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry, Set entriesToRemove) { QueryInfo queryInfo = entry.getQueryInfo(); for (ReadEntity readEntity : queryInfo.getInputs()) { // Check that the tables used do not resolve to temp tables. @@ -614,6 +641,34 @@ private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry) { tableUsed.getTableName()); return false; } + + // Has the table changed since the query was cached? + // For transactional tables, can compare the table writeIDs of the current/cached query. + if (AcidUtils.isTransactionalTable(tableUsed)) { + boolean writeIdCheckPassed = false; + String tableName = tableUsed.getFullyQualifiedName(); + ValidTxnWriteIdList currentTxnWriteIdList = lookupInfo.txnWriteIdListProvider.get(); + ValidWriteIdList currentWriteIdForTable = + currentTxnWriteIdList.getTableValidWriteIdList(tableName); + ValidWriteIdList cachedWriteIdForTable = entry.txnWriteIdList.getTableValidWriteIdList(tableName); + + LOG.debug("Checking writeIds for table {}: currentWriteIdForTable {}, cachedWriteIdForTable {}", + tableName, currentWriteIdForTable, cachedWriteIdForTable); + if (currentWriteIdForTable != null && cachedWriteIdForTable != null) { + if (TxnIdUtils.checkEquivalentWriteIds(currentWriteIdForTable, cachedWriteIdForTable)) { + writeIdCheckPassed = true; + } + } + + if (!writeIdCheckPassed) { + LOG.debug("Cached query no longer valid due to table {}", tableUsed.getFullyQualifiedName()); + // We can invalidate the entry now, but calling removeEntry() requires a write lock + // and we may already have read lock taken now. Add to entriesToRemove to delete later. + entriesToRemove.add(entry); + entry.invalidate(); + return false; + } + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 44a7496136..2b1960c087 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -27,6 +27,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.HiveStatsUtils; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; @@ -41,6 +43,8 @@ import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.io.orc.Writer; +import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; +import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -1508,11 +1512,19 @@ public static boolean isRemovedInsertOnlyTable(Set removedSet) { } /** - * Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList. + * Get the ValidTxnWriteIdList saved in the configuration. */ - public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, String fullTableName) { + public static ValidTxnWriteIdList getValidTxnWriteIdList(Configuration conf) { String txnString = conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY); ValidTxnWriteIdList validTxnList = new ValidTxnWriteIdList(txnString); + return validTxnList; + } + + /** + * Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList. + */ + public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, String fullTableName) { + ValidTxnWriteIdList validTxnList = getValidTxnWriteIdList(conf); return validTxnList.getTableValidWriteIdList(fullTableName); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 10982ddbd1..79f58eec20 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -40,8 +40,10 @@ import java.util.Set; import java.util.TreeSet; import java.util.UUID; +import java.util.function.Supplier; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import java.util.stream.Collectors; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; @@ -63,6 +65,8 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.conf.HiveConf; @@ -117,6 +121,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType; @@ -14522,7 +14527,33 @@ private String getQueryStringForCache(ASTNode ast) { QueryResultsCache.LookupInfo lookupInfo = null; String queryString = getQueryStringForCache(astNode); if (queryString != null) { - lookupInfo = new QueryResultsCache.LookupInfo(queryString); + lookupInfo = new QueryResultsCache.LookupInfo(queryString, + new Supplier() { + ValidTxnWriteIdList cachedWriteIdList = null; + @Override + public ValidTxnWriteIdList get() { + if (cachedWriteIdList == null) { + // TODO: Once HIVE-18948 is in, should be able to retrieve writeIdList from the conf. + //cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf); + // + List transactionalTables = tablesFromReadEntities(inputs) + .stream() + .filter(table -> AcidUtils.isTransactionalTable(table)) + .map(table -> table.getFullyQualifiedName()) + .collect(Collectors.toList()); + try { + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + cachedWriteIdList = + getTxnMgr().getValidWriteIds(transactionalTables, txnString); + } catch (Exception err) { + String msg = "Error while getting the txnWriteIdList for tables " + transactionalTables + + " and validTxnList " + conf.get(ValidTxnList.VALID_TXNS_KEY); + throw new RuntimeException(msg, err); + } + } + return cachedWriteIdList; + } + }); } return lookupInfo; } @@ -14620,9 +14651,31 @@ private boolean queryCanBeCached() { return false; } + if (!conf.getBoolVar(ConfVars.HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED)) { + List nonTransactionalTables = getNonTransactionalTables(); + if (nonTransactionalTables.size() > 0) { + LOG.info("Not eligible for results caching - query contains non-transactional tables {}", + nonTransactionalTables); + return false; + } + } return true; } + private static Set
tablesFromReadEntities(Set readEntities) { + return readEntities.stream() + .filter(entity -> entity.getType() == Entity.Type.TABLE) + .map(entity -> entity.getTable()) + .collect(Collectors.toSet()); + } + + private List
getNonTransactionalTables() { + return tablesFromReadEntities(inputs) + .stream() + .filter(table -> !AcidUtils.isTransactionalTable(table)) + .collect(Collectors.toList()); + } + /** * Check the query results cache to see if the query represented by the lookupInfo can be * answered using the results cache. If the cache contains a suitable entry, the semantic analyzer diff --git a/ql/src/test/queries/clientpositive/results_cache_1.q b/ql/src/test/queries/clientpositive/results_cache_1.q index 4aea60e1e5..0c85c4ae54 100644 --- a/ql/src/test/queries/clientpositive/results_cache_1.q +++ b/ql/src/test/queries/clientpositive/results_cache_1.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; explain select count(*) from src a join src b on (a.key = b.key); diff --git a/ql/src/test/queries/clientpositive/results_cache_2.q b/ql/src/test/queries/clientpositive/results_cache_2.q index 96a90925f6..034ec184e6 100644 --- a/ql/src/test/queries/clientpositive/results_cache_2.q +++ b/ql/src/test/queries/clientpositive/results_cache_2.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; set hive.fetch.task.conversion=more; -- Test 1: fetch task diff --git a/ql/src/test/queries/clientpositive/results_cache_capacity.q b/ql/src/test/queries/clientpositive/results_cache_capacity.q index 9f54577009..eeb11e1d88 100644 --- a/ql/src/test/queries/clientpositive/results_cache_capacity.q +++ b/ql/src/test/queries/clientpositive/results_cache_capacity.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; -- Allow results cache to hold entries up to 125 bytes -- The single row queries are small enough to fit in the cache (103 bytes) diff --git a/ql/src/test/queries/clientpositive/results_cache_empty_result.q b/ql/src/test/queries/clientpositive/results_cache_empty_result.q index 621367141e..f5b99b51e0 100644 --- a/ql/src/test/queries/clientpositive/results_cache_empty_result.q +++ b/ql/src/test/queries/clientpositive/results_cache_empty_result.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; explain select count(*), key from src a where key < 0 group by key; diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation.q b/ql/src/test/queries/clientpositive/results_cache_invalidation.q new file mode 100644 index 0000000000..b69bdf2159 --- /dev/null +++ b/ql/src/test/queries/clientpositive/results_cache_invalidation.q @@ -0,0 +1,89 @@ + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true'); +create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true'); + +insert into tab1 select * from default.src; +insert into tab2 select * from default.src; + +set hive.query.results.cache.enabled=true; + +set test.comment="Run queries to load into cache"; +set test.comment; + +-- Q1 +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +-- Q2 +explain +select max(key) from tab2; +select max(key) from tab2; + +-- Q3 +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +set test.comment="Q1 should now be able to use cache"; +set test.comment; +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +set test.comment="Q2 should now be able to use cache"; +set test.comment; +explain +select max(key) from tab2; +select max(key) from tab2; + +set test.comment="Q3 should now be able to use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +-- Update tab1 which should invalidate Q1 and Q3. +insert into tab1 values ('88', 'val_88'); + +set test.comment="Q1 should not use cache"; +set test.comment; +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +set test.comment="Q2 should still use cache since tab2 not updated"; +set test.comment; +explain +select max(key) from tab2; +select max(key) from tab2; + +set test.comment="Q3 should not use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +-- Update tab2 which should invalidate Q2 and Q3. +insert into tab2 values ('88', 'val_88'); + +set test.comment="Q1 should use cache"; +set test.comment; +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +set test.comment="Q2 should not use cache"; +set test.comment; +explain +select max(key) from tab2; +select max(key) from tab2; + +set test.comment="Q3 should not use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); diff --git a/ql/src/test/queries/clientpositive/results_cache_lifetime.q b/ql/src/test/queries/clientpositive/results_cache_lifetime.q index 60ffe96a04..1c306e14ab 100644 --- a/ql/src/test/queries/clientpositive/results_cache_lifetime.q +++ b/ql/src/test/queries/clientpositive/results_cache_lifetime.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; set hive.query.results.cache.max.entry.lifetime=2; -- This query used the cache from results_cache_1.q. Load it up. diff --git a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q index 4802f43ba9..c5684f0a34 100644 --- a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q +++ b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q @@ -9,6 +9,7 @@ create table quoted1 ( insert into quoted1 select key, key, value, value from src; set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; explain select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1; diff --git a/ql/src/test/queries/clientpositive/results_cache_temptable.q b/ql/src/test/queries/clientpositive/results_cache_temptable.q index 9e0de765cb..7e16702c48 100644 --- a/ql/src/test/queries/clientpositive/results_cache_temptable.q +++ b/ql/src/test/queries/clientpositive/results_cache_temptable.q @@ -1,4 +1,5 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; create table rct (key string, value string); load data local inpath '../../data/files/kv1.txt' overwrite into table rct; diff --git a/ql/src/test/queries/clientpositive/results_cache_transactional.q b/ql/src/test/queries/clientpositive/results_cache_transactional.q new file mode 100644 index 0000000000..9181c6db63 --- /dev/null +++ b/ql/src/test/queries/clientpositive/results_cache_transactional.q @@ -0,0 +1,56 @@ + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true'); +create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true'); + +insert into tab1 select * from default.src; +insert into tab2 select * from default.src; + +set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=false; + +explain +select max(key) from tab1; +select max(key) from tab1; + +set test.comment="Query on transactional table should use cache"; +set test.comment; +explain +select max(key) from tab1; +select max(key) from tab1; + +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +set test.comment="Join on transactional tables, should use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + + +-- Non-transactional tables + +explain +select max(key) from src; +select max(key) from src; + +set test.comment="Query on non-transactional table should not use cache"; +set test.comment; +explain +select max(key) from src; +select max(key) from src; + +explain +select count(*) from tab1 join src on (tab1.key = src.key); +select count(*) from tab1 join src on (tab1.key = src.key); + +set test.comment="Join uses non-transactional table, should not use cache"; +set test.comment; +explain +select count(*) from tab1 join src on (tab1.key = src.key); +select count(*) from tab1 join src on (tab1.key = src.key); + diff --git a/ql/src/test/queries/clientpositive/results_cache_with_masking.q b/ql/src/test/queries/clientpositive/results_cache_with_masking.q index b4fcdd57eb..d353598a16 100644 --- a/ql/src/test/queries/clientpositive/results_cache_with_masking.q +++ b/ql/src/test/queries/clientpositive/results_cache_with_masking.q @@ -3,6 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; create table masking_test as select cast(key as int) as key, value from src; diff --git a/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out new file mode 100644 index 0000000000..c76de92122 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out @@ -0,0 +1,793 @@ +PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab1 +POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab1 +PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab2 +POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab2 +PREHOOK: query: insert into tab1 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into tab2 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +test.comment="Run queries to load into cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) >= 0.0D) (type: boolean) + Statistics: Num rows: 30 Data size: 5338 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 30 Data size: 5338 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +500 +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 95 Data size: 17028 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +test.comment="Q1 should now be able to use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +500 +test.comment="Q2 should now be able to use cache" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +98 +test.comment="Q3 should now be able to use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +1028 +PREHOOK: query: insert into tab1 values ('88', 'val_88') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 values ('88', 'val_88') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SCRIPT [] +POSTHOOK: Lineage: tab1.value SCRIPT [] +test.comment="Q1 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) >= 0.0D) (type: boolean) + Statistics: Num rows: 36 Data size: 6383 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 36 Data size: 6383 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +501 +test.comment="Q2 should still use cache since tab2 not updated" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +98 +test.comment="Q3 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 115 Data size: 20478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +PREHOOK: query: insert into tab2 values ('88', 'val_88') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 values ('88', 'val_88') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SCRIPT [] +POSTHOOK: Lineage: tab2.value SCRIPT [] +test.comment="Q1 should use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +501 +test.comment="Q2 should not use cache" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +test.comment="Q3 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 115 Data size: 20478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1029 diff --git a/ql/src/test/results/clientpositive/llap/results_cache_transactional.q.out b/ql/src/test/results/clientpositive/llap/results_cache_transactional.q.out new file mode 100644 index 0000000000..a38ddfa3ea --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/results_cache_transactional.q.out @@ -0,0 +1,624 @@ +PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab1 +POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab1 +PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab2 +POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab2 +PREHOOK: query: insert into tab1 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into tab2 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select max(key) from tab1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +98 +test.comment="Query on transactional table should use cache" +PREHOOK: query: explain +select max(key) from tab1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +POSTHOOK: query: select max(key) from tab1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +98 +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 95 Data size: 17028 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +test.comment="Join on transactional tables, should use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +1028 +PREHOOK: query: explain +select max(key) from src +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 +test.comment="Query on non-transactional table should not use cache" +PREHOOK: query: explain +select max(key) from src +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 +PREHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +1028 +test.comment="Join uses non-transactional table, should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +1028 diff --git a/ql/src/test/results/clientpositive/results_cache_invalidation.q.out b/ql/src/test/results/clientpositive/results_cache_invalidation.q.out new file mode 100644 index 0000000000..5f225a1cb1 --- /dev/null +++ b/ql/src/test/results/clientpositive/results_cache_invalidation.q.out @@ -0,0 +1,748 @@ +PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab1 +POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab1 +PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab2 +POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab2 +PREHOOK: query: insert into tab1 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into tab2 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +test.comment="Run queries to load into cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) >= 0.0D) (type: boolean) + Statistics: Num rows: 30 Data size: 11548 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 30 Data size: 11548 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +500 +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 100 Data size: 38533 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +test.comment="Q1 should now be able to use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +500 +test.comment="Q2 should now be able to use cache" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +test.comment="Q3 should now be able to use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +PREHOOK: query: insert into tab1 values ('88', 'val_88') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 values ('88', 'val_88') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SCRIPT [] +POSTHOOK: Lineage: tab1.value SCRIPT [] +test.comment="Q1 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) >= 0.0D) (type: boolean) + Statistics: Num rows: 36 Data size: 13840 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 36 Data size: 13840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +501 +test.comment="Q2 should still use cache since tab2 not updated" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +test.comment="Q3 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 121 Data size: 46519 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +PREHOOK: query: insert into tab2 values ('88', 'val_88') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 values ('88', 'val_88') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SCRIPT [] +POSTHOOK: Lineage: tab2.value SCRIPT [] +test.comment="Q1 should use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +501 +test.comment="Q2 should not use cache" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +test.comment="Q3 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: tab2 + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 121 Data size: 46519 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1029 diff --git a/ql/src/test/results/clientpositive/results_cache_transactional.q.out b/ql/src/test/results/clientpositive/results_cache_transactional.q.out new file mode 100644 index 0000000000..f2fac38f7d --- /dev/null +++ b/ql/src/test/results/clientpositive/results_cache_transactional.q.out @@ -0,0 +1,583 @@ +PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab1 +POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab1 +PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab2 +POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab2 +PREHOOK: query: insert into tab1 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into tab2 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select max(key) from tab1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +98 +test.comment="Query on transactional table should use cache" +PREHOOK: query: explain +select max(key) from tab1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +98 +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 100 Data size: 38533 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +test.comment="Join on transactional tables, should use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +PREHOOK: query: explain +select max(key) from src +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 +test.comment="Query on non-transactional table should not use cache" +PREHOOK: query: explain +select max(key) from src +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 +PREHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +1028 +test.comment="Join uses non-transactional table, should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +1028