diff --git common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java index df77a4a2f2..09343e5616 100644 --- common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java +++ common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java @@ -54,17 +54,17 @@ * @return array of FileStatus * @throws IOException */ - public static FileStatus[] getFileStatusRecurse(Path path, int level, FileSystem fs) + public static List getFileStatusRecurse(Path path, int level, FileSystem fs) throws IOException { return getFileStatusRecurse(path, level, fs, FileUtils.HIDDEN_FILES_PATH_FILTER, false); } - public static FileStatus[] getFileStatusRecurse( + public static List getFileStatusRecurse( Path path, int level, FileSystem fs, PathFilter filter) throws IOException { return getFileStatusRecurse(path, level, fs, filter, false); } - public static FileStatus[] getFileStatusRecurse( + public static List getFileStatusRecurse( Path path, int level, FileSystem fs, PathFilter filter, boolean allLevelsBelow) throws IOException { @@ -79,9 +79,9 @@ // does not exist. But getFileStatus() throw IOException. To mimic the // similar behavior we will return empty array on exception. For external // tables, the path of the table will not exists during table creation - return new FileStatus[0]; + return new ArrayList<>(0); } - return result.toArray(new FileStatus[result.size()]); + return result; } // construct a path pattern (e.g., /*/*) to find all dynamically generated paths @@ -91,7 +91,7 @@ } Path pathPattern = new Path(path, sb.toString()); if (!allLevelsBelow) { - return fs.globStatus(pathPattern, filter); + return Lists.newArrayList(fs.globStatus(pathPattern, filter)); } LinkedList queue = new LinkedList<>(); List results = new ArrayList(); @@ -114,7 +114,7 @@ } } } - return results.toArray(new FileStatus[results.size()]); + return results; } public static int getNumBitVectorsForNDVEstimation(Configuration conf) throws Exception { diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index fb926eb23f..04b8c4b5af 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -575,6 +575,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test mode", true), HIVE_IN_TEST_SSL("hive.in.ssl.test", false, "internal usage only, true in SSL test mode", true), + // TODO: this needs to be removed; see TestReplicationScenarios* comments. + HIVE_IN_TEST_REPL("hive.in.repl.test", false, "internal usage only, true in replication test mode", true), HIVE_IN_TEST_IDE("hive.in.ide.test", false, "internal usage only, true if test running in ide", true), HIVE_TESTING_SHORT_LOGS("hive.testing.short.logs", false, diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index 41c89b1cd3..c383a53081 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.ObjectStore; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest; +import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.NotNullConstraintsRequest; import org.apache.hadoop.hive.metastore.api.NotificationEvent; @@ -72,12 +73,14 @@ import org.slf4j.LoggerFactory; import javax.annotation.Nullable; + import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; + import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; @@ -146,6 +149,7 @@ public static void setUpBeforeClass() throws Exception { hconf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hconf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hconf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hconf.set(HiveConf.ConfVars.HIVE_IN_TEST_REPL.varname, "true"); hconf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); hconf.set(HiveConf.ConfVars.HIVE_TXN_MANAGER.varname, "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager"); @@ -2774,7 +2778,7 @@ public void testIncrementalRepeatEventOnExistingObject() throws IOException { } @Test - public void testIncrementalRepeatEventOnMissingObject() throws IOException { + public void testIncrementalRepeatEventOnMissingObject() throws Exception { String testName = "incrementalRepeatEventOnMissingObject"; String dbName = createDB(testName, driver); run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE", driver); @@ -3211,10 +3215,13 @@ public void testRemoveStats() throws IOException { } @Test - public void testSkipTables() throws IOException { + public void testSkipTables() throws Exception { String testName = "skipTables"; String dbName = createDB(testName, driver); + // TODO: this is wrong; this test sets up dummy txn manager and so it cannot create ACID tables. + // If I change it to use proper txn manager, the setup for some tests hangs. + // This used to work by accident, now this works due a test flag. The test needs to be fixed. // Create table run("CREATE TABLE " + dbName + ".acid_table (key int, value int) PARTITIONED BY (load_date date) " + "CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver); @@ -3651,14 +3658,10 @@ private void verifyIfTableNotExist(String dbName, String tableName, HiveMetaStor assertEquals(NoSuchObjectException.class, e.getClass()); } - private void verifyIfTableExist(String dbName, String tableName, HiveMetaStoreClient myClient){ - Exception e = null; - try { - Table tbl = myClient.getTable(dbName, tableName); - assertNotNull(tbl); - } catch (TException te) { - assert(false); - } + private void verifyIfTableExist( + String dbName, String tableName, HiveMetaStoreClient myClient) throws Exception { + Table tbl = myClient.getTable(dbName, tableName); + assertNotNull(tbl); } private void verifyIfPartitionNotExist(String dbName, String tableName, List partValues, diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index 6e8d6b62a5..13b918df7d 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -73,6 +73,7 @@ public static void classLevelSetup() throws Exception { new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build(); HashMap overridesForHiveConf = new HashMap() {{ put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); + put(HiveConf.ConfVars.HIVE_IN_TEST_REPL.varname, "true"); }}; primary = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf); replica = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf); @@ -398,6 +399,9 @@ public void testBootStrapDumpOfWarehouse() throws Throwable { .run("create table t1 (i int, j int)") .run("create database " + dbOne) .run("use " + dbOne) + // TODO: this is wrong; this test sets up dummy txn manager and so it cannot create ACID tables. + // This used to work by accident, now this works due a test flag. The test needs to be fixed. + // Also applies for a couple more tests. .run("create table t1 (i int, j int) partitioned by (load_date date) " + "clustered by(i) into 2 buckets stored as orc tblproperties ('transactional'='true') ") .run("create database " + dbTwo) diff --git ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index 9ea7a7c79b..f53afaff2b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -36,6 +36,7 @@ import java.util.concurrent.ConcurrentHashMap; import com.google.common.annotations.VisibleForTesting; + import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.ExplainTask; @@ -60,6 +61,8 @@ import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TJSONProtocol; import org.apache.thrift.transport.TMemoryBuffer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * QueryPlan can be serialized to disk so that we can restart/resume the @@ -149,6 +152,7 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S this.acidResourcesInQuery = sem.hasTransactionalInQuery(); this.acidSinks = sem.getAcidFileSinks(); } + private static final Logger LOG = LoggerFactory.getLogger(QueryPlan.class); /** * @return true if any acid resources are read/written diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java index eee5e66ea7..ce683c8a8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java @@ -62,7 +62,7 @@ public int execute(DriverContext driverContext) { protected int copyOnePath(Path fromPath, Path toPath) { FileSystem dstFs = null; try { - Utilities.FILE_OP_LOGGER./**/debug("Copying data from {} to {} " + fromPath); + Utilities.FILE_OP_LOGGER.trace("Copying data from {} to {} " + fromPath); console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index b490325091..7eba5e88d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -687,7 +687,7 @@ private void checkFileFormats(Hive db, LoadTableDesc tbd, Table table) * has done it's job before the query ran. */ WriteEntity.WriteType getWriteType(LoadTableDesc tbd, AcidUtils.Operation operation) { - if (tbd.getLoadFileType() == LoadFileType.REPLACE_ALL) { + if (tbd.getLoadFileType() == LoadFileType.REPLACE_ALL || tbd.isInsertOverwrite()) { return WriteEntity.WriteType.INSERT_OVERWRITE; } switch (operation) { @@ -730,13 +730,13 @@ private void updatePartitionBucketSortColumns(Hive db, Table table, Partition pa // have the correct buckets. The existing code discards the inferred data when the // reducers don't produce enough files; we'll do the same for MM tables for now. FileSystem fileSys = partn.getDataLocation().getFileSystem(conf); - FileStatus[] fileStatus = HiveStatsUtils.getFileStatusRecurse( + List fileStatus = HiveStatsUtils.getFileStatusRecurse( partn.getDataLocation(), 1, fileSys); // Verify the number of buckets equals the number of files // This will not hold for dynamic partitions where not every reducer produced a file for // those partitions. In this case the table is not bucketed as Hive requires a files for // each bucket. - if (fileStatus.length == numBuckets) { + if (fileStatus.size() == numBuckets) { List newBucketCols = new ArrayList(); updateBucketCols = true; for (BucketCol bucketCol : bucketCols) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 804cd7868b..5fbe045df5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1493,8 +1493,9 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, } // Remove duplicates from tmpPath - FileStatus[] statuses = HiveStatsUtils.getFileStatusRecurse( + List statusList = HiveStatsUtils.getFileStatusRecurse( tmpPath, ((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs); + FileStatus[] statuses = statusList.toArray(new FileStatus[statusList.size()]); if(statuses != null && statuses.length > 0) { PerfLogger perfLogger = SessionState.getPerfLogger(); Set filesKept = new HashSet(); @@ -1601,8 +1602,9 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path, boolean if (path == null) { return null; } - FileStatus[] stats = HiveStatsUtils.getFileStatusRecurse(path, + List statusList = HiveStatsUtils.getFileStatusRecurse(path, ((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs); + FileStatus[] stats = statusList.toArray(new FileStatus[statusList.size()]); return removeTempOrDuplicateFiles(fs, stats, dpCtx, conf, hconf, isBaseDir); } @@ -2675,9 +2677,9 @@ public boolean skipProcessing(Task task) { Path loadPath = dpCtx.getRootPath(); FileSystem fs = loadPath.getFileSystem(conf); int numDPCols = dpCtx.getNumDPCols(); - FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(loadPath, numDPCols, fs); + List status = HiveStatsUtils.getFileStatusRecurse(loadPath, numDPCols, fs); - if (status.length == 0) { + if (status.isEmpty()) { LOG.warn("No partition is generated by dynamic partitioning"); return null; } @@ -2690,9 +2692,9 @@ public boolean skipProcessing(Task task) { // for each dynamically created DP directory, construct a full partition spec // and load the partition based on that - for (int i = 0; i < status.length; ++i) { + for (int i = 0; i < status.size(); ++i) { // get the dynamically created directory - Path partPath = status[i].getPath(); + Path partPath = status.get(i).getPath(); assert fs.getFileStatus(partPath).isDir() : "partitions " + partPath + " is not a directory !"; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java index 0a82225d4a..eb9b1e5ff6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java @@ -240,6 +240,8 @@ private void addPartition(boolean hasMorePartitions, AddPartitionDesc addPartiti */ private Task movePartitionTask(Table table, AddPartitionDesc.OnePartitionDesc partSpec, Path tmpPath) { + // Note: this sets LoadFileType incorrectly for ACID; is that relevant for load? + // See setLoadFileType and setIsAcidIow calls elsewhere for an example. LoadTableDesc loadTableWork = new LoadTableDesc( tmpPath, Utilities.getTableDesc(table), partSpec.getPartSpec(), event.replicationSpec().isReplace() ? LoadFileType.REPLACE_ALL : LoadFileType.OVERWRITE_EXISTING, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index ced84b3e15..1828f0a531 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; @@ -34,6 +35,7 @@ import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; @@ -45,6 +47,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.mapred.JobConf; import org.apache.hive.common.util.Ref; import org.apache.orc.FileFormatException; import org.apache.orc.impl.OrcAcidUtils; @@ -1034,6 +1037,7 @@ else if (prev != null && next.maxWriteId == prev.maxWriteId return o1.getFileStatus().compareTo(o2.getFileStatus()); }); + // Note: isRawFormat is invalid for non-ORC tables. It will always return true, so we're good. final boolean isBaseInRawFormat = base != null && MetaDataFile.isRawFormat(base, fs); return new Directory() { @@ -1098,7 +1102,13 @@ private static void getChildState(FileStatus child, HdfsFileStatusWithId childWi FileSystem fs) throws IOException { Path p = child.getPath(); String fn = p.getName(); - if (fn.startsWith(BASE_PREFIX) && child.isDir()) { + if (!child.isDirectory()) { + if (!ignoreEmptyFiles || child.getLen() != 0) { + original.add(createOriginalObj(childWithId, child)); + } + return; + } + if (fn.startsWith(BASE_PREFIX)) { long writeId = parseBase(p); if(bestBase.oldestBaseWriteId > writeId) { //keep track for error reporting @@ -1119,28 +1129,25 @@ private static void getChildState(FileStatus child, HdfsFileStatusWithId childWi } else { obsolete.add(child); } - } else if ((fn.startsWith(DELTA_PREFIX) || fn.startsWith(DELETE_DELTA_PREFIX)) - && child.isDir()) { - String deltaPrefix = - (fn.startsWith(DELTA_PREFIX)) ? DELTA_PREFIX : DELETE_DELTA_PREFIX; + } else if (fn.startsWith(DELTA_PREFIX) || fn.startsWith(DELETE_DELTA_PREFIX)) { + String deltaPrefix = fn.startsWith(DELTA_PREFIX) ? DELTA_PREFIX : DELETE_DELTA_PREFIX; ParsedDelta delta = parseDelta(child, deltaPrefix, fs); - if (tblproperties != null && AcidUtils.isInsertOnlyTable(tblproperties) && - ValidWriteIdList.RangeResponse.ALL == writeIdList.isWriteIdRangeAborted(delta.minWriteId, delta.maxWriteId)) { + // Handle aborted deltas. Currently this can only happen for MM tables. + if (tblproperties != null && isTransactionalTable(tblproperties) && + ValidWriteIdList.RangeResponse.ALL == writeIdList.isWriteIdRangeAborted( + delta.minWriteId, delta.maxWriteId)) { aborted.add(child); } - if (writeIdList.isWriteIdRangeValid(delta.minWriteId, - delta.maxWriteId) != - ValidWriteIdList.RangeResponse.NONE) { + if (writeIdList.isWriteIdRangeValid( + delta.minWriteId, delta.maxWriteId) != ValidWriteIdList.RangeResponse.NONE) { working.add(delta); } - } else if (child.isDir()) { + } else { // This is just the directory. We need to recurse and find the actual files. But don't // do this until we have determined there is no base. This saves time. Plus, // it is possible that the cleaner is running and removing these original files, // in which case recursing through them could cause us to get an error. originalDirectories.add(child); - } else if (!ignoreEmptyFiles || child.getLen() != 0){ - original.add(createOriginalObj(childWithId, child)); } } @@ -1252,9 +1259,13 @@ public static boolean isTransactionalTable(CreateTableDesc table) { if (table == null || table.getTblProps() == null) { return false; } - String tableIsTransactional = table.getTblProps().get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); + return isTransactionalTable(table.getTblProps()); + } + + public static boolean isTransactionalTable(Map props) { + String tableIsTransactional = props.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); if (tableIsTransactional == null) { - tableIsTransactional = table.getTblProps().get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase()); + tableIsTransactional = props.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase()); } return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true"); } @@ -1744,4 +1755,40 @@ public static int getAcidVersionFromMetaFile(Path deltaOrBaseDir, FileSystem fs) } } } + + public static List getAcidFilesForStats( + Table table, Path dir, Configuration jc, FileSystem fs) throws IOException { + List fileList = new ArrayList<>(); + ValidWriteIdList idList = AcidUtils.getTableValidWriteIdList(jc, + AcidUtils.getFullTableName(table.getDbName(), table.getTableName())); + if (idList == null) { + LOG.warn("Cannot get ACID state for " + table.getDbName() + "." + table.getTableName() + + " from " + jc.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); + return null; + } + Directory acidInfo = AcidUtils.getAcidState(dir, jc, idList); + // Assume that for an MM table, or if there's only the base directory, we are good. + if (!acidInfo.getCurrentDirectories().isEmpty() && AcidUtils.isFullAcidTable(table)) { + Utilities.FILE_OP_LOGGER.warn( + "Computing stats for an ACID table; stats may be inaccurate"); + } + if (fs == null) { + fs = dir.getFileSystem(jc); + } + for (HdfsFileStatusWithId hfs : acidInfo.getOriginalFiles()) { + fileList.add(hfs.getFileStatus()); + } + for (ParsedDelta delta : acidInfo.getCurrentDirectories()) { + for (FileStatus f : HiveStatsUtils.getFileStatusRecurse(delta.getPath(), -1, fs)) { + fileList.add(f); + } + } + if (acidInfo.getBaseDirectory() != null) { + for (FileStatus f : HiveStatsUtils.getFileStatusRecurse( + acidInfo.getBaseDirectory(), -1, fs)) { + fileList.add(f); + } + } + return fileList; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java index 1a63d3f971..07abd378c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java @@ -161,15 +161,14 @@ public void resolveConcatenateMerge(HiveConf conf) { Path dirPath = inputPaths.get(0); try { FileSystem inpFs = dirPath.getFileSystem(conf); - FileStatus[] status = - HiveStatsUtils.getFileStatusRecurse(dirPath, listBucketingCtx - .getSkewedColNames().size(), inpFs); + List status = HiveStatsUtils.getFileStatusRecurse( + dirPath, listBucketingCtx.getSkewedColNames().size(), inpFs); List newInputPath = new ArrayList(); boolean succeed = true; - for (int i = 0; i < status.length; ++i) { - if (status[i].isDir()) { + for (FileStatus s : status) { + if (s.isDir()) { // Add the lb path to the list of input paths - newInputPath.add(status[i].getPath()); + newInputPath.add(s.getPath()); } else { // find file instead of dir. dont change inputpath succeed = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index c0be51e0b2..6b635fc99d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1575,10 +1575,9 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par Path newPartPath = null; if (inheritTableSpecs) { - Path partPath = new Path(tbl.getDataLocation(), - Warehouse.makePartPath(partSpec)); - newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(), - partPath.toUri().getPath()); + Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec)); + newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), + tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath()); if(oldPart != null) { /* @@ -1606,6 +1605,12 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) { newFiles = Collections.synchronizedList(new ArrayList()); } + + + // Note: the stats for ACID tables do not have any coordination with either Hive ACID logic + // like txn commits, time outs, etc.; nor the lower level sync in metastore pertaining + // to ACID updates. So the are not themselves ACID. + // Note: this assumes both paths are qualified; which they are, currently. if (isMmTableWrite && loadPath.equals(newPartPath)) { // MM insert query, move itself is a no-op. @@ -1626,7 +1631,9 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par Path destPath = newPartPath; if (isMmTableWrite) { // We will load into MM directory, and delete from the parent if needed. + // TODO: this looks invalid after ACID integration. What about base dirs? destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId)); + // TODO: loadFileType for MM table will no longer be REPLACE_ALL filter = (loadFileType == LoadFileType.REPLACE_ALL) ? new JavaUtils.IdPathFilter(writeId, stmtId, false, true) : filter; } @@ -1641,6 +1648,7 @@ else if(!isAcidIUDoperation && isFullAcidTable) { //for fullAcid tables we don't delete files for commands with OVERWRITE - we create a new // base_x. (there is Insert Overwrite and Load Data Overwrite) boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); + // TODO: this should never run for MM tables anymore. Remove the flag, and maybe the filter? replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), isSrcLocal, isAutoPurge, newFiles, filter, isMmTableWrite, !tbl.isTemporary()); } else { @@ -1689,7 +1697,21 @@ else if(!isAcidIUDoperation && isFullAcidTable) { StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(), MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); } - MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters()); + // Note: we are creating a brand new the partition, so this is going to be valid for ACID. + List filesForStats = null; + if (isFullAcidTable || isMmTableWrite) { + filesForStats = AcidUtils.getAcidFilesForStats( + newTPart.getTable(), newPartPath, conf, null); + } else { + filesForStats = HiveStatsUtils.getFileStatusRecurse( + newPartPath, -1, newPartPath.getFileSystem(conf)); + } + if (filesForStats != null) { + MetaStoreUtils.populateQuickStats(filesForStats, newTPart.getParameters()); + } else { + // The ACID state is probably absent. Warning is logged in the get method. + MetaStoreUtils.clearQuickStats(newTPart.getParameters()); + } try { LOG.debug("Adding new partition " + newTPart.getSpec()); getSynchronizedMSC().add_partition(newTPart.getTPartition()); @@ -1946,7 +1968,7 @@ private void constructOneLBLocationMap(FileStatus fSta, try { FileSystem fs = loadPath.getFileSystem(conf); if (!isMmTable) { - FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP, fs); + List leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP, fs); // Check for empty partitions for (FileStatus s : leafStatus) { if (!s.isDirectory()) { @@ -2168,9 +2190,13 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) { newFiles = Collections.synchronizedList(new ArrayList()); } + // Note: this assumes both paths are qualified; which they are, currently. if (isMmTable && loadPath.equals(tbl.getPath())) { - Utilities.FILE_OP_LOGGER.debug("not moving " + loadPath + " to " + tbl.getPath()); + if (Utilities.FILE_OP_LOGGER.isDebugEnabled()) { + Utilities.FILE_OP_LOGGER.debug( + "not moving " + loadPath + " to " + tbl.getPath() + " (MM)"); + } newFiles = listFilesCreatedByQuery(loadPath, writeId, stmtId); } else { // Either a non-MM query, or a load into MM table from an external source. @@ -2180,7 +2206,9 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType if (isMmTable) { assert !isAcidIUDoperation; // We will load into MM directory, and delete from the parent if needed. + // TODO: this looks invalid after ACID integration. What about base dirs? destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId)); + // TODO: loadFileType for MM table will no longer be REPLACE_ALL filter = loadFileType == LoadFileType.REPLACE_ALL ? new JavaUtils.IdPathFilter(writeId, stmtId, false, true) : filter; } @@ -2193,6 +2221,7 @@ else if(!isAcidIUDoperation && isFullAcidTable) { //for fullAcid we don't want to delete any files even for OVERWRITE see HIVE-14988/HIVE-17361 //todo: should probably do the same for MM IOW boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); + // TODO: this should never run for MM tables anymore. Remove the flag, and maybe the filter? replaceFiles(tblPath, loadPath, destPath, tblPath, sessionConf, isSrcLocal, isAutopurge, newFiles, filter, isMmTable?true:false, !tbl.isTemporary()); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 1c6b793e11..b7fbea4473 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -520,6 +520,8 @@ private static boolean isAcid(Long writeId) { Task addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf()); + // Note: this sets LoadFileType incorrectly for ACID; is that relevant for import? + // See setLoadFileType and setIsAcidIow calls elsewhere for an example. LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), partSpec.getPartSpec(), replicationSpec.isReplace() ? LoadFileType.REPLACE_ALL : LoadFileType.OVERWRITE_EXISTING, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 7d2de75315..fb3bfdacb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -329,8 +329,9 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { stmtId = SessionState.get().getTxnMgr().getStmtIdAndIncrement(); } - LoadTableDesc loadTableWork; - loadTableWork = new LoadTableDesc(new Path(fromURI), + // Note: this sets LoadFileType incorrectly for ACID; is that relevant for load? + // See setLoadFileType and setIsAcidIow calls elsewhere for an example. + LoadTableDesc loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING, writeId); loadTableWork.setStmtId(stmtId); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d99df80b3e..d1609e1186 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6953,10 +6953,12 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp, isReplace, writeId); // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old // deltas and base and leave them up to the cleaner to clean up - LoadFileType loadType = (!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), - dest_tab.getTableName()) && !destTableIsTransactional) + boolean isInsertInto = qb.getParseInfo().isInsertIntoTable( + dest_tab.getDbName(), dest_tab.getTableName()); + LoadFileType loadType = (!isInsertInto && !destTableIsTransactional) ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING; ltd.setLoadFileType(loadType); + ltd.setInsertOverwrite(!isInsertInto); ltd.setLbCtx(lbCtx); loadTableWork.add(ltd); } else { @@ -7042,10 +7044,12 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp, writeId); // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old // deltas and base and leave them up to the cleaner to clean up - LoadFileType loadType = (!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), - dest_tab.getTableName()) && !destTableIsTransactional) // // Both Full-acid and MM tables are excluded. + boolean isInsertInto = qb.getParseInfo().isInsertIntoTable( + dest_tab.getDbName(), dest_tab.getTableName()); + LoadFileType loadType = (!isInsertInto && !destTableIsTransactional) ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING; ltd.setLoadFileType(loadType); + ltd.setInsertOverwrite(!isInsertInto); ltd.setLbCtx(lbCtx); loadTableWork.add(ltd); @@ -7055,7 +7059,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES .getMsg(dest_tab.getTableName() + "@" + dest_part.getName())); } - break; + break; } case QBMetaData.DEST_LOCAL_FILE: isLocal = true; @@ -12323,7 +12327,6 @@ public void validate() throws SemanticException { for (WriteEntity writeEntity : getOutputs()) { WriteEntity.Type type = writeEntity.getType(); - if (type == WriteEntity.Type.PARTITION || type == WriteEntity.Type.DUMMYPARTITION) { String conflictingArchive = null; try { @@ -12697,8 +12700,6 @@ ASTNode analyzeCreateTable( } } - addDbAndTabToOutputs(qualifiedTabName, TableType.MANAGED_TABLE); - if (isTemporary) { if (partCols.size() > 0) { throw new SemanticException("Partition columns are not supported on temporary tables"); @@ -12720,11 +12721,14 @@ ASTNode analyzeCreateTable( } // Handle different types of CREATE TABLE command + // Note: each branch must call addDbAndTabToOutputs after finalizing table properties. + switch (command_type) { case CREATE_TABLE: // REGULAR CREATE TABLE DDL tblProps = addDefaultProperties( tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization); + addDbAndTabToOutputs(qualifiedTabName, TableType.MANAGED_TABLE, tblProps); CreateTableDesc crtTblDesc = new CreateTableDesc(dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, @@ -12747,6 +12751,7 @@ ASTNode analyzeCreateTable( case CTLT: // create table like tblProps = addDefaultProperties( tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization); + addDbAndTabToOutputs(qualifiedTabName, TableType.MANAGED_TABLE, tblProps); if (isTemporary) { Table likeTable = getTable(likeTableName, false); @@ -12825,6 +12830,7 @@ ASTNode analyzeCreateTable( tblProps = addDefaultProperties( tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization); + addDbAndTabToOutputs(qualifiedTabName, TableType.MANAGED_TABLE, tblProps); tableDesc = new CreateTableDesc(qualifiedTabName[0], dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, @@ -12846,11 +12852,14 @@ ASTNode analyzeCreateTable( return null; } - private void addDbAndTabToOutputs(String[] qualifiedTabName, TableType type) throws SemanticException { + /** Adds entities for create table/create view. */ + private void addDbAndTabToOutputs(String[] qualifiedTabName, TableType type, + Map tblProps) throws SemanticException { Database database = getDatabase(qualifiedTabName[0]); outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_SHARED)); Table t = new Table(qualifiedTabName[0], qualifiedTabName[1]); + t.setParameters(tblProps); t.setTableType(type); outputs.add(new WriteEntity(t, WriteEntity.WriteType.DDL_NO_LOCK)); } @@ -12952,7 +12961,7 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps()); - addDbAndTabToOutputs(qualTabName, TableType.MATERIALIZED_VIEW); + addDbAndTabToOutputs(qualTabName, TableType.MATERIALIZED_VIEW, tblProps); queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW); } else { createVwDesc = new CreateViewDesc( @@ -12961,7 +12970,7 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt storageFormat.getOutputFormat(), storageFormat.getSerde()); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), createVwDesc), conf)); - addDbAndTabToOutputs(qualTabName, TableType.VIRTUAL_VIEW); + addDbAndTabToOutputs(qualTabName, TableType.VIRTUAL_VIEW, tblProps); queryState.setCommandType(HiveOperation.CREATEVIEW); } qb.setViewDesc(createVwDesc); @@ -14074,7 +14083,8 @@ protected boolean deleting(String destination) { // Make sure the proper transaction manager that supports ACID is being used protected void checkAcidTxnManager(Table table) throws SemanticException { - if (SessionState.get() != null && !getTxnMgr().supportsAcid()) { + if (SessionState.get() != null && !getTxnMgr().supportsAcid() + && !HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST_REPL)) { throw new SemanticException(ErrorMsg.TXNMGR_NOT_ACID, table.getDbName(), table.getTableName()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java index a4e770ce95..55d05a1dd9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java @@ -172,9 +172,11 @@ public boolean isTargetRewritten() { return true; } // INSERT OVERWRITE - if (getLoadTableDesc() != null && getLoadTableDesc().getLoadFileType() == LoadFileType.REPLACE_ALL) { + LoadTableDesc ltd = getLoadTableDesc(); + if (ltd != null && (ltd.getLoadFileType() == LoadFileType.REPLACE_ALL || ltd.isInsertOverwrite())) { return true; } + // CREATE TABLE ... AS if (getLoadFileDesc() != null && getLoadFileDesc().getCtasCreateTableDesc() != null) { return true; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 8ce0cb05b6..80f77b9f0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -231,7 +231,8 @@ private void generateActualTasks(HiveConf conf, List statusList = HiveStatsUtils.getFileStatusRecurse(dirPath, dpLbLevel, inpFs); + FileStatus[] status = statusList.toArray(new FileStatus[statusList.size()]); // cleanup pathToPartitionInfo Map ptpi = work.getPathToPartitionInfo(); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java index 946c300750..d4d46a3671 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java @@ -36,6 +36,8 @@ import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -161,11 +163,18 @@ public void run() { long rawDataSize = 0; long fileSize = 0; long numFiles = 0; - LOG.debug("Aggregating stats for {}", dir); - FileStatus[] fileList = HiveStatsUtils.getFileStatusRecurse(dir, -1, fs); + // Note: this code would be invalid for transactional tables of any kind. + Utilities.FILE_OP_LOGGER.debug("Aggregating stats for {}", dir); + List fileList = null; + if (partish.getTable() != null + && AcidUtils.isTransactionalTable(partish.getTable())) { + fileList = AcidUtils.getAcidFilesForStats(partish.getTable(), dir, jc, fs); + } else { + fileList = HiveStatsUtils.getFileStatusRecurse(dir, -1, fs); + } for (FileStatus file : fileList) { - LOG.debug("Computing stats for {}", file); + Utilities.FILE_OP_LOGGER.debug("Computing stats for {}", file); if (!file.isDirectory()) { InputFormat inputFormat = ReflectionUtil.newInstance(partish.getInputFormatClass(), jc); InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { partish.getLocation() }); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index 1d7660e8b2..8c23887176 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.stats; +import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; @@ -42,6 +43,7 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -111,7 +113,8 @@ public String getName() { private static class BasicStatsProcessor { private Partish partish; - private FileStatus[] partfileStatus; + private List partfileStatus; + private boolean isMissingAcidState = false; private BasicStatsWork work; private boolean followedColStats1; @@ -124,11 +127,10 @@ public BasicStatsProcessor(Partish partish, BasicStatsWork work, HiveConf conf, public Object process(StatsAggregator statsAggregator) throws HiveException, MetaException { Partish p = partish; Map parameters = p.getPartParameters(); - if (p.isAcid()) { + if (p.isTransactionalTable()) { + // TODO: this should also happen on any error. Right now this task will just fail. StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); - } - - if (work.isTargetRewritten()) { + } else if (work.isTargetRewritten()) { StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE); } @@ -140,8 +142,15 @@ public Object process(StatsAggregator statsAggregator) throws HiveException, Met StatsSetupConst.clearColumnStatsState(parameters); } - if(partfileStatus == null){ - LOG.warn("Partition/partfiles is null for: " + partish.getPartition().getSpec()); + if (partfileStatus == null) { + // This may happen if ACID state is absent from config. + String spec = partish.getPartition() == null ? partish.getTable().getTableName() + : partish.getPartition().getSpec().toString(); + LOG.warn("Partition/partfiles is null for: " + spec); + if (isMissingAcidState) { + MetaStoreUtils.clearQuickStats(parameters); + return p.getOutput(); + } return null; } @@ -153,23 +162,28 @@ public Object process(StatsAggregator statsAggregator) throws HiveException, Met StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); } - updateQuickStats(parameters, partfileStatus); - if (StatsSetupConst.areBasicStatsUptoDate(parameters)) { - if (statsAggregator != null) { + MetaStoreUtils.populateQuickStats(partfileStatus, parameters); + + if (statsAggregator != null) { + // Update stats for transactional tables (MM, or full ACID with overwrite), even + // though we are marking stats as not being accurate. + if (StatsSetupConst.areBasicStatsUptoDate(parameters) || p.isTransactionalTable()) { String prefix = getAggregationPrefix(p.getTable(), p.getPartition()); - updateStats(statsAggregator, parameters, prefix); + updateStats(statsAggregator, parameters, prefix, p.isAcid()); } } return p.getOutput(); } - public void collectFileStatus(Warehouse wh) throws MetaException { - partfileStatus = wh.getFileStatusesForSD(partish.getPartSd()); - } - - private void updateQuickStats(Map parameters, FileStatus[] partfileStatus) throws MetaException { - MetaStoreUtils.populateQuickStats(partfileStatus, parameters); + public void collectFileStatus(Warehouse wh, HiveConf conf) throws MetaException, IOException { + if (!partish.isTransactionalTable()) { + partfileStatus = wh.getFileStatusesForSD(partish.getPartSd()); + } else { + Path path = new Path(partish.getPartSd().getLocation()); + partfileStatus = AcidUtils.getAcidFilesForStats(partish.getTable(), path, conf, null); + isMissingAcidState = true; + } } private String getAggregationPrefix(Table table, Partition partition) throws MetaException { @@ -191,9 +205,15 @@ private String getAggregationPrefix0(Table table, Partition partition) throws Me return prefix; } - private void updateStats(StatsAggregator statsAggregator, Map parameters, String aggKey) throws HiveException { - + private void updateStats(StatsAggregator statsAggregator, Map parameters, + String aggKey, boolean isFullAcid) throws HiveException { for (String statType : StatsSetupConst.statsRequireCompute) { + if (isFullAcid && !work.isTargetRewritten()) { + // Don't bother with aggregation in this case, it will probably be invalid. + parameters.remove(statType); + continue; + } + String value = statsAggregator.aggregateStats(aggKey, statType); if (value != null && !value.isEmpty()) { long longValue = Long.parseLong(value); @@ -247,7 +267,7 @@ private int aggregateStats(Hive db) { partishes.add(p = new Partish.PTable(table)); BasicStatsProcessor basicStatsProcessor = new BasicStatsProcessor(p, work, conf, followedColStats); - basicStatsProcessor.collectFileStatus(wh); + basicStatsProcessor.collectFileStatus(wh, conf); Table res = (Table) basicStatsProcessor.process(statsAggregator); if (res == null) { return 0; @@ -280,7 +300,7 @@ private int aggregateStats(Hive db) { futures.add(pool.submit(new Callable() { @Override public Void call() throws Exception { - bsp.collectFileStatus(wh); + bsp.collectFileStatus(wh, conf); return null; } })); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index 7591c0681b..d4cfd0ad62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -184,4 +184,4 @@ public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaExce public void setDpPartSpecs(Collection dpPartSpecs) { } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/Partish.java ql/src/java/org/apache/hadoop/hive/ql/stats/Partish.java index 05b0474e90..47810e2c34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/Partish.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/Partish.java @@ -51,6 +51,10 @@ public final boolean isAcid() { return AcidUtils.isFullAcidTable(getTable()); } + public final boolean isTransactionalTable() { + return AcidUtils.isTransactionalTable(getTable()); + } + public abstract Table getTable(); public abstract Map getPartParameters(); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java index d84cf136d5..6d2de0a3ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java @@ -50,9 +50,7 @@ public boolean connect(StatsCollectionContext scc) { List statsDirs = scc.getStatsTmpDirs(); assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs; Path statsDir = new Path(statsDirs.get(0)); - if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { - Utilities.FILE_OP_LOGGER.trace("About to read stats from : " + statsDir); - } + Utilities.FILE_OP_LOGGER.trace("About to read stats from {}", statsDir); statsMap = new HashMap>(); try { @@ -65,6 +63,7 @@ public boolean accept(Path file) { } }); for (FileStatus file : status) { + Utilities.FILE_OP_LOGGER.trace("About to read stats file {} ", file.getPath()); Input in = new Input(fs.open(file.getPath())); Kryo kryo = SerializationUtilities.borrowKryo(); try { @@ -72,6 +71,7 @@ public boolean accept(Path file) { } finally { SerializationUtilities.releaseKryo(kryo); } + Utilities.FILE_OP_LOGGER.trace("Read : {}", statsMap); statsList.add(statsMap); in.close(); } @@ -86,7 +86,7 @@ public boolean accept(Path file) { @Override public String aggregateStats(String partID, String statType) { long counter = 0; - LOG.debug("Part ID: " + partID + "\t" + statType); + Utilities.FILE_OP_LOGGER.debug("Part ID: " + partID + "\t" + statType); for (Map> statsMap : statsList) { Map partStat = statsMap.get(partID); if (null == partStat) { // not all partitions are scanned in all mappers, so this could be null. @@ -98,7 +98,7 @@ public String aggregateStats(String partID, String statType) { } counter += Long.parseLong(statVal); } - LOG.info("Read stats for : " + partID + "\t" + statType + "\t" + counter); + Utilities.FILE_OP_LOGGER.info("Read stats for : " + partID + "\t" + statType + "\t" + counter); return String.valueOf(counter); } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java index 922cfc23c0..902b37f787 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java @@ -105,9 +105,7 @@ public boolean closeConnection(StatsCollectionContext context) { statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX + conf.getInt("mapred.task.partition", 0)); } - if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { - Utilities.FILE_OP_LOGGER.trace("About to create stats file for this task : " + statsFile); - } + Utilities.FILE_OP_LOGGER.trace("About to create stats file for this task : {}", statsFile); Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true)); LOG.debug("Created file : " + statsFile); LOG.debug("Writing stats in it : " + statsMap); diff --git ql/src/test/queries/clientnegative/orc_change_fileformat_acid.q ql/src/test/queries/clientnegative/orc_change_fileformat_acid.q index cc73616a32..e9a1a419e5 100644 --- ql/src/test/queries/clientnegative/orc_change_fileformat_acid.q +++ ql/src/test/queries/clientnegative/orc_change_fileformat_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; create table src_orc (key tinyint, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); alter table src_orc set fileformat textfile; diff --git ql/src/test/queries/clientnegative/orc_change_serde_acid.q ql/src/test/queries/clientnegative/orc_change_serde_acid.q index 91a2be50c0..d5f208cac9 100644 --- ql/src/test/queries/clientnegative/orc_change_serde_acid.q +++ ql/src/test/queries/clientnegative/orc_change_serde_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; create table src_orc (key tinyint, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); alter table src_orc set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; diff --git ql/src/test/queries/clientnegative/orc_reorder_columns1_acid.q ql/src/test/queries/clientnegative/orc_reorder_columns1_acid.q index 234e74bb74..0169784b90 100644 --- ql/src/test/queries/clientnegative/orc_reorder_columns1_acid.q +++ ql/src/test/queries/clientnegative/orc_reorder_columns1_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; create table src_orc (key tinyint, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); alter table src_orc change key k tinyint first; diff --git ql/src/test/queries/clientnegative/orc_reorder_columns2_acid.q ql/src/test/queries/clientnegative/orc_reorder_columns2_acid.q index 57ab049c6d..d42752ebaf 100644 --- ql/src/test/queries/clientnegative/orc_reorder_columns2_acid.q +++ ql/src/test/queries/clientnegative/orc_reorder_columns2_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; create table src_orc (key tinyint, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); alter table src_orc change key k tinyint after val; diff --git ql/src/test/queries/clientnegative/orc_replace_columns1_acid.q ql/src/test/queries/clientnegative/orc_replace_columns1_acid.q index 9fe9209d03..91367fd74d 100644 --- ql/src/test/queries/clientnegative/orc_replace_columns1_acid.q +++ ql/src/test/queries/clientnegative/orc_replace_columns1_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; create table src_orc (key tinyint, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); alter table src_orc replace columns (k int); diff --git ql/src/test/queries/clientnegative/orc_replace_columns2_acid.q ql/src/test/queries/clientnegative/orc_replace_columns2_acid.q index 7b37757ebf..a46c0107d5 100644 --- ql/src/test/queries/clientnegative/orc_replace_columns2_acid.q +++ ql/src/test/queries/clientnegative/orc_replace_columns2_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; -- Currently, string to int conversion is not supported because it isn't in the lossless diff --git ql/src/test/queries/clientnegative/orc_replace_columns3_acid.q ql/src/test/queries/clientnegative/orc_replace_columns3_acid.q index e3cb819b62..59f7f45fe3 100644 --- ql/src/test/queries/clientnegative/orc_replace_columns3_acid.q +++ ql/src/test/queries/clientnegative/orc_replace_columns3_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; -- Currently, smallint to tinyint conversion is not supported because it isn't in the lossless diff --git ql/src/test/queries/clientnegative/orc_type_promotion1_acid.q ql/src/test/queries/clientnegative/orc_type_promotion1_acid.q index 3a8c08a829..1eda267caa 100644 --- ql/src/test/queries/clientnegative/orc_type_promotion1_acid.q +++ ql/src/test/queries/clientnegative/orc_type_promotion1_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; -- Currently, string to int conversion is not supported because it isn't in the lossless diff --git ql/src/test/queries/clientnegative/orc_type_promotion2_acid.q ql/src/test/queries/clientnegative/orc_type_promotion2_acid.q index 1d24b1dd18..b593b0ffe0 100644 --- ql/src/test/queries/clientnegative/orc_type_promotion2_acid.q +++ ql/src/test/queries/clientnegative/orc_type_promotion2_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; -- Currently, bigint to int conversion is not supported because it isn't in the lossless diff --git ql/src/test/queries/clientnegative/orc_type_promotion3_acid.q ql/src/test/queries/clientnegative/orc_type_promotion3_acid.q index 83764e29cc..94832f6eda 100644 --- ql/src/test/queries/clientnegative/orc_type_promotion3_acid.q +++ ql/src/test/queries/clientnegative/orc_type_promotion3_acid.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + SET hive.exec.schema.evolution=false; -- Currently, double to smallint conversion is not supported because it isn't in the lossless diff --git ql/src/test/queries/clientpositive/acid_nullscan.q ql/src/test/queries/clientpositive/acid_nullscan.q index d048231584..3c71242ea6 100644 --- ql/src/test/queries/clientpositive/acid_nullscan.q +++ ql/src/test/queries/clientpositive/acid_nullscan.q @@ -10,6 +10,7 @@ CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STO insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; insert into table acid_vectorized values (1, 'bar'); + explain extended select sum(a) from acid_vectorized where false; diff --git ql/src/test/results/clientpositive/acid_nullscan.q.out ql/src/test/results/clientpositive/acid_nullscan.q.out index 669fa3fa47..d15e2f1bce 100644 --- ql/src/test/results/clientpositive/acid_nullscan.q.out +++ ql/src/test/results/clientpositive/acid_nullscan.q.out @@ -42,12 +42,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_vectorized - Statistics: Num rows: 1 Data size: 25470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 88 Data size: 25470 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 25470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 289 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(a) mode: hash @@ -78,8 +78,6 @@ STAGE PLANS: #### A masked pattern was here #### name default.acid_vectorized numFiles 3 - numRows 0 - rawDataSize 0 serialization.ddl struct acid_vectorized { i32 a, string b} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe @@ -101,8 +99,6 @@ STAGE PLANS: #### A masked pattern was here #### name default.acid_vectorized numFiles 3 - numRows 0 - rawDataSize 0 serialization.ddl struct acid_vectorized { i32 a, string b} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde diff --git ql/src/test/results/clientpositive/autoColumnStats_4.q.out ql/src/test/results/clientpositive/autoColumnStats_4.q.out index 1f4c0adfc7..63caa1730b 100644 --- ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -195,8 +195,6 @@ Retention: 0 Table Type: MANAGED_TABLE Table Parameters: numFiles 2 - numRows 0 - rawDataSize 0 totalSize 1862 transactional true transactional_properties default @@ -239,8 +237,6 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {} numFiles 4 - numRows 0 - rawDataSize 0 totalSize 3012 transactional true transactional_properties default diff --git ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index b4c8be06ef..18ca78fc49 100644 --- ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -330,33 +330,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 492 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Map 3 Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Reducer 2 Reduce Operator Tree: @@ -367,14 +367,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -451,34 +451,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 16072 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1913 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( t AS timestamp with local time zone) (type: timestamp with local time zone), 3 (type: int), b (type: varchar(256)), c (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1913 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity - Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1913 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: __time_granularity (type: timestamp) sort order: + Map-reduce partition columns: __time_granularity (type: timestamp) - Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1913 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double) Reducer 2 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: int), VALUE._col2 (type: varchar(256)), VALUE._col3 (type: double), KEY.__time_granularity (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity - Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1913 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1913 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat @@ -537,33 +537,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 492 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Map 3 Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Reducer 2 Reduce Operator Tree: @@ -574,14 +574,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index 1abd3a29f1..6a5ace0af6 100644 --- ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -45,22 +45,22 @@ STAGE PLANS: alias: acidtbldefault filterExpr: (a = 1) (type: boolean) buckets included: [1,] of 16 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1765 Data size: 6712 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (a = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 19 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 directory: hdfs://### HDFS PATH ### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 19 Basic stats: COMPLETE Column stats: NONE Stats Publishing Key Prefix: hdfs://### HDFS PATH ### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -98,8 +98,6 @@ STAGE PLANS: location hdfs://### HDFS PATH ### name default.acidtbldefault numFiles 17 - numRows 0 - rawDataSize 0 serialization.ddl struct acidtbldefault { i32 a} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -122,8 +120,6 @@ STAGE PLANS: location hdfs://### HDFS PATH ### name default.acidtbldefault numFiles 17 - numRows 0 - rawDataSize 0 serialization.ddl struct acidtbldefault { i32 a} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde diff --git ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index 64e5b17936..05d5d0abb3 100644 --- ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -665,22 +665,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed - Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1227 Data size: 702030 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: ROW__ID - Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1227 Data size: 702030 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: ROW__ID (type: struct) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 613 Data size: 51492 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 613 Data size: 51492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: may be used (ACID table) @@ -692,13 +692,13 @@ STAGE PLANS: keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 613 Data size: 51492 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 17136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 17136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/default_constraint.q.out ql/src/test/results/clientpositive/llap/default_constraint.q.out index 89b1224004..72ff7af046 100644 --- ql/src/test/results/clientpositive/llap/default_constraint.q.out +++ ql/src/test/results/clientpositive/llap/default_constraint.q.out @@ -1490,8 +1490,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 1 - totalSize 1063 + COLUMN_STATS_ACCURATE {} transactional true transactional_properties default #### A masked pattern was here #### @@ -1658,9 +1657,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {} #### A masked pattern was here #### - numFiles 2 - totalSize 2127 transactional true transactional_properties default #### A masked pattern was here #### @@ -1737,9 +1735,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {} #### A masked pattern was here #### - numFiles 2 - totalSize 2127 transactional true transactional_properties default #### A masked pattern was here #### @@ -1913,9 +1910,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {} #### A masked pattern was here #### - numFiles 3 - totalSize 3192 transactional true transactional_properties default #### A masked pattern was here #### @@ -1991,9 +1987,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {} #### A masked pattern was here #### - numFiles 3 - totalSize 3192 transactional true transactional_properties default #### A masked pattern was here #### @@ -2070,9 +2065,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {} #### A masked pattern was here #### - numFiles 3 - totalSize 3192 transactional true transactional_properties default #### A masked pattern was here #### @@ -2644,8 +2638,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 1 - totalSize 1063 + COLUMN_STATS_ACCURATE {} transactional true transactional_properties default #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 6a97736008..97f7cd159f 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -94,19 +94,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part - Statistics: Num rows: 1600 Data size: 150327 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158 Data size: 60414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1911 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1911 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1911 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -115,10 +115,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1911 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1911 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -189,7 +189,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part - Statistics: Num rows: 1600 Data size: 444727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 157 Data size: 102975 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL @@ -380,19 +380,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part_sdpo - Statistics: Num rows: 1600 Data size: 150327 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174 Data size: 66399 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1908 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1908 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1908 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -401,10 +401,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1908 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1908 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -475,7 +475,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part_sdpo - Statistics: Num rows: 1600 Data size: 444727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 169 Data size: 110820 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL @@ -675,19 +675,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 1600 Data size: 150327 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 154 Data size: 59496 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -696,10 +696,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -771,19 +771,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 3200 Data size: 313367 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 156727 Basic stats: PARTIAL Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL value expressions: _col4 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) @@ -793,10 +793,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -897,7 +897,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 3200 Data size: 902167 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 451127 Basic stats: PARTIAL Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL @@ -1095,19 +1095,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 1600 Data size: 150327 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 154 Data size: 59496 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1116,10 +1116,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1931 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1191,19 +1191,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 3200 Data size: 313367 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 156727 Basic stats: PARTIAL Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) - Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1212,11 +1212,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 489 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1317,7 +1317,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 3200 Data size: 902167 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 451127 Basic stats: PARTIAL Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL @@ -1515,7 +1515,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo_no_cp - Statistics: Num rows: 1600 Data size: 598664 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 95 Data size: 81408 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL @@ -1613,19 +1613,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo_no_cp - Statistics: Num rows: 3200 Data size: 1197144 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 598664 Basic stats: PARTIAL Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1870 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1870 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1870 Basic stats: PARTIAL Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1635,11 +1635,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1870 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1870 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 352f6bad01..eeb6a7a0b5 100644 --- ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3233,19 +3233,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 239 Data size: 75112 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1571 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1571 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1571 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: llap LLAP IO: may be used (ACID table) @@ -3255,10 +3255,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1571 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1571 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -3326,19 +3326,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 277 Data size: 86920 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((de = 3.14) and enforce_constraint(i is not null) and enforce_constraint(vc is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 313 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 313 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 313 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: llap LLAP IO: may be used (ACID table) @@ -3348,10 +3348,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 313 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 313 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out index d8863a2c80..1ef7b876d8 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out @@ -704,19 +704,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2 - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 4872 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((c > 10.1) and a is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,2)) Execution mode: llap LLAP IO: may be used (ACID table) @@ -730,7 +730,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 2668 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col2 (type: decimal(10,2)) mode: hash @@ -982,19 +982,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2 - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 4872 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((c > 10.1) and a is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,2)) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1008,7 +1008,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 2668 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col2 (type: decimal(10,2)) mode: hash diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out index d8863a2c80..1ef7b876d8 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out @@ -704,19 +704,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2 - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 4872 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((c > 10.1) and a is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,2)) Execution mode: llap LLAP IO: may be used (ACID table) @@ -730,7 +730,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 2668 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col2 (type: decimal(10,2)) mode: hash @@ -982,19 +982,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2 - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 4872 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((c > 10.1) and a is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1624 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,2)) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1008,7 +1008,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 2668 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col2 (type: decimal(10,2)) mode: hash diff --git ql/src/test/results/clientpositive/llap/mm_all.q.out ql/src/test/results/clientpositive/llap/mm_all.q.out index 23e733b4c0..7c8a38f290 100644 --- ql/src/test/results/clientpositive/llap/mm_all.q.out +++ ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -1623,10 +1623,10 @@ POSTHOOK: Output: default@multi1_mm@p=1 POSTHOOK: Output: default@multi1_mm@p=455 POSTHOOK: Output: default@multi1_mm@p=456 POSTHOOK: Output: default@multi1_mm@p=457 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] @@ -1700,10 +1700,10 @@ POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p PREHOOK: type: QUERY PREHOOK: Input: default@multi1_mm @@ -1815,7 +1815,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 3 numRows 6 rawDataSize 13 @@ -1865,7 +1864,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 6 numRows 12 rawDataSize 26 @@ -1923,7 +1921,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {} numFiles 55 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out index 29e408c60c..65614566c9 100644 --- ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out +++ ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out @@ -710,19 +710,19 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE TableScan alias: cmv_basetable_2 - Statistics: Num rows: 3 Data size: 727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 23560 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((c > 10.1) and a is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 7853 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 7853 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 7853 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(10,2)) Reduce Operator Tree: Join Operator @@ -732,12 +732,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 5 Data size: 1325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 8638 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col2 (type: decimal(10,2)) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 1325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 8638 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -753,20 +753,20 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: decimal(10,2)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2)) - Statistics: Num rows: 5 Data size: 1325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 8638 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: decimal(10,2)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 4031 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 4031 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 4031 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1013,19 +1013,19 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE TableScan alias: cmv_basetable_2 - Statistics: Num rows: 3 Data size: 727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 23560 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((c > 10.1) and a is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 7853 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 7853 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 7853 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(10,2)) Reduce Operator Tree: Join Operator @@ -1035,12 +1035,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 5 Data size: 1325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 8638 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col2 (type: decimal(10,2)) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 1325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 8638 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1056,20 +1056,20 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: decimal(10,2)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2)) - Statistics: Num rows: 5 Data size: 1325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 8638 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: decimal(10,2)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 4031 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 4031 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 4031 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/mm_all.q.out ql/src/test/results/clientpositive/mm_all.q.out index ac6c08057c..a0cbdb557b 100644 --- ql/src/test/results/clientpositive/mm_all.q.out +++ ql/src/test/results/clientpositive/mm_all.q.out @@ -1637,10 +1637,10 @@ POSTHOOK: Output: default@multi1_mm@p=1 POSTHOOK: Output: default@multi1_mm@p=455 POSTHOOK: Output: default@multi1_mm@p=456 POSTHOOK: Output: default@multi1_mm@p=457 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] @@ -1829,7 +1829,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 1 numRows 6 rawDataSize 13 @@ -1879,7 +1878,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 2 numRows 12 rawDataSize 26 @@ -1937,7 +1935,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/mm_default.q.out ql/src/test/results/clientpositive/mm_default.q.out index 1345efdfb6..f5341ed1a6 100644 --- ql/src/test/results/clientpositive/mm_default.q.out +++ ql/src/test/results/clientpositive/mm_default.q.out @@ -180,7 +180,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {} numFiles 1 numRows 1 rawDataSize 3 diff --git ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out index 92a04ddbf3..114f9a8796 100644 --- ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out +++ ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out @@ -680,22 +680,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed - Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1227 Data size: 702030 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: ROW__ID - Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1227 Data size: 702030 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: ROW__ID (type: struct) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 613 Data size: 51492 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 613 Data size: 51492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -704,13 +704,13 @@ STAGE PLANS: keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 613 Data size: 51492 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 17136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 17136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 7f18f2b42b..8d67fe4ab0 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -295,16 +295,16 @@ Stage-3 Reducer 2 File Output Operator [FS_8] table:{"name:":"default.acid_uami"} - Select Operator [SEL_4] (rows=1/2 width=328) + Select Operator [SEL_4] (rows=10/2 width=321) Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_3] PartitionCols:UDFToInteger(_col0) - Select Operator [SEL_2] (rows=1/2 width=328) + Select Operator [SEL_2] (rows=10/2 width=321) Output:["_col0","_col1","_col3"] - Filter Operator [FIL_9] (rows=1/2 width=328) + Filter Operator [FIL_9] (rows=10/2 width=321) predicate:((de = 109.23) or (de = 119.23)) - TableScan [TS_0] (rows=1/4 width=328) + TableScan [TS_0] (rows=49/4 width=321) default@acid_uami,acid_uami, ACID table,Tbl:COMPLETE,Col:NONE,Output:["i","de","vc"] PREHOOK: query: select * from acid_uami order by de diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 59190893e6..78ea01d968 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -270,6 +270,7 @@ public static void clearColumnStatsState(Map params) { if (params == null) { return; } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); stats.columnStats.clear(); diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 89354a2d34..0dd3eb1017 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -297,7 +297,7 @@ public void alterTable(RawStore msdb, Warehouse wh, String dbname, !isPartitionedTable) { Database db = msdb.getDatabase(newDbName); // Update table stats. For partitioned table, we update stats in alterPartition() - MetaStoreUtils.updateTableStatsFast(db, newt, wh, false, true, environmentContext); + MetaStoreUtils.updateTableStatsFast(db, newt, wh, false, true, environmentContext, false); } if (isPartitionedTable) { @@ -436,23 +436,25 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String .currentTimeMillis() / 1000)); } - Table tbl = msdb.getTable(dbname, name); - if (tbl == null) { - throw new InvalidObjectException( - "Unable to alter partition because table or database does not exist."); - } //alter partition if (part_vals == null || part_vals.size() == 0) { try { msdb.openTransaction(); + + Table tbl = msdb.getTable(dbname, name); + if (tbl == null) { + throw new InvalidObjectException( + "Unable to alter partition because table or database does not exist."); + } oldPart = msdb.getPartition(dbname, name, new_part.getValues()); if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) { // if stats are same, no need to update if (MetaStoreUtils.isFastStatsSame(oldPart, new_part)) { MetaStoreUtils.updateBasicState(environmentContext, new_part.getParameters()); } else { - MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true, environmentContext); + MetaStoreUtils.updatePartitionStatsFast( + new_part, tbl, wh, false, true, environmentContext, false); } } @@ -494,6 +496,11 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String boolean dataWasMoved = false; try { msdb.openTransaction(); + Table tbl = msdb.getTable(dbname, name); + if (tbl == null) { + throw new InvalidObjectException( + "Unable to alter partition because table or database does not exist."); + } try { oldPart = msdb.getPartition(dbname, name, part_vals); } catch (NoSuchObjectException e) { @@ -581,7 +588,8 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String } if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) { - MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true, environmentContext); + MetaStoreUtils.updatePartitionStatsFast( + new_part, tbl, wh, false, true, environmentContext, false); } String newPartName = Warehouse.makePartName(tbl.getPartitionKeys(), new_part.getValues()); @@ -650,15 +658,16 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String transactionalListeners = handler.getTransactionalListeners(); } - Table tbl = msdb.getTable(dbname, name); - if (tbl == null) { - throw new InvalidObjectException( - "Unable to alter partitions because table or database does not exist."); - } boolean success = false; try { msdb.openTransaction(); + + Table tbl = msdb.getTable(dbname, name); + if (tbl == null) { + throw new InvalidObjectException( + "Unable to alter partitions because table or database does not exist."); + } for (Partition tmpPart: new_parts) { // Set DDL time to now if not specified if (tmpPart.getParameters() == null || @@ -677,7 +686,8 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String if (MetaStoreUtils.isFastStatsSame(oldTmpPart, tmpPart)) { MetaStoreUtils.updateBasicState(environmentContext, tmpPart.getParameters()); } else { - MetaStoreUtils.updatePartitionStatsFast(tmpPart, wh, false, true, environmentContext); + MetaStoreUtils.updatePartitionStatsFast( + tmpPart, tbl, wh, false, true, environmentContext, false); } } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 662de9a667..c99c3afc09 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1488,7 +1488,7 @@ private void create_table_core(final RawStore ms, final Table tbl, } if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updateTableStatsFast(db, tbl, wh, madeDir, envContext); + MetaStoreUtils.updateTableStatsFast(db, tbl, wh, madeDir, false, envContext, true); } // set create time @@ -2673,7 +2673,7 @@ private Partition append_partition_common(RawStore ms, String dbName, String tab if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updatePartitionStatsFast(part, wh, madeDir, envContext); + MetaStoreUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, envContext, true); } if (ms.addPartition(part)) { @@ -3241,7 +3241,7 @@ private void initializeAddedPartition( final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException { if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updatePartitionStatsFast(part, wh, madeDir, false, null); + MetaStoreUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, null, true); } // set create time diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java index 20c10607bb..445a7b8ad2 100755 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -543,7 +543,7 @@ public static String makePartName(List partCols, * @return array of FileStatus objects corresponding to the files * making up the passed storage description */ - public FileStatus[] getFileStatusesForSD(StorageDescriptor desc) + public List getFileStatusesForSD(StorageDescriptor desc) throws MetaException { return getFileStatusesForLocation(desc.getLocation()); } @@ -553,7 +553,7 @@ public static String makePartName(List partCols, * @return array of FileStatus objects corresponding to the files * making up the passed storage description */ - public FileStatus[] getFileStatusesForLocation(String location) + public List getFileStatusesForLocation(String location) throws MetaException { try { Path path = new Path(location); @@ -571,7 +571,7 @@ public static String makePartName(List partCols, * @return array of FileStatus objects corresponding to the files making up the passed * unpartitioned table */ - public FileStatus[] getFileStatusesForUnpartitionedTable(Database db, Table table) + public List getFileStatusesForUnpartitionedTable(Database db, Table table) throws MetaException { Path tablePath = getDnsPath(new Path(table.getSd().getLocation())); try { diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java index b44ff8ce47..4138fa5b70 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.metastore.utils; +import org.apache.curator.shaded.com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; @@ -314,11 +315,11 @@ public static String unescapePathName(String path) { * @return array of FileStatus * @throws IOException */ - public static FileStatus[] getFileStatusRecurse(Path path, int level, FileSystem fs) + public static List getFileStatusRecurse(Path path, int level, FileSystem fs) throws IOException { // if level is <0, the return all files/directories under the specified path - if ( level < 0) { + if (level < 0) { List result = new ArrayList<>(); try { FileStatus fileStatus = fs.getFileStatus(path); @@ -328,9 +329,9 @@ public static String unescapePathName(String path) { // does not exist. But getFileStatus() throw IOException. To mimic the // similar behavior we will return empty array on exception. For external // tables, the path of the table will not exists during table creation - return new FileStatus[0]; + return new ArrayList<>(0); } - return result.toArray(new FileStatus[result.size()]); + return result; } // construct a path pattern (e.g., /*/*) to find all dynamically generated paths @@ -339,7 +340,7 @@ public static String unescapePathName(String path) { sb.append(Path.SEPARATOR).append("*"); } Path pathPattern = new Path(path, sb.toString()); - return fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER); + return Lists.newArrayList(fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER)); } /** diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index b051961442..d3439112c4 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -606,20 +606,16 @@ public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { return false; } - public static boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, - boolean madeDir, EnvironmentContext environmentContext) throws MetaException { - return updateTableStatsFast(db, tbl, wh, madeDir, false, environmentContext); - } - - public static boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, - boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { - if (tbl.getPartitionKeysSize() == 0) { - // Update stats only when unpartitioned - FileStatus[] fileStatuses = wh.getFileStatusesForUnpartitionedTable(db, tbl); - return updateTableStatsFast(tbl, fileStatuses, madeDir, forceRecompute, environmentContext); - } else { - return false; - } + public static boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, boolean madeDir, + boolean forceRecompute, EnvironmentContext environmentContext, boolean isCreate) throws MetaException { + if (tbl.getPartitionKeysSize() != 0) return false; + // Update stats only when unpartitioned + // TODO: this is also invalid for ACID tables, except for the create case by coincidence; + // because the methods in metastore get all the files in the table directory without + // regard for ACID state. + List fileStatuses = wh.getFileStatusesForUnpartitionedTable(db, tbl); + return updateTableStatsFast( + tbl, fileStatuses, madeDir, forceRecompute, environmentContext, isCreate); } /** @@ -632,8 +628,9 @@ public static boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, * these parameters set * @return true if the stats were updated, false otherwise */ - public static boolean updateTableStatsFast(Table tbl, FileStatus[] fileStatus, boolean newDir, - boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { + private static boolean updateTableStatsFast(Table tbl, List fileStatus, + boolean newDir, boolean forceRecompute, EnvironmentContext environmentContext, + boolean isCreate) throws MetaException { Map params = tbl.getParameters(); @@ -646,39 +643,43 @@ public static boolean updateTableStatsFast(Table tbl, FileStatus[] fileStatus, b } } - boolean updated = false; - if (forceRecompute || - params == null || - !containsAllFastStats(params)) { - if (params == null) { - params = new HashMap<>(); - } - if (!newDir) { - // The table location already exists and may contain data. - // Let's try to populate those stats that don't require full scan. - LOG.info("Updating table stats fast for " + tbl.getTableName()); - populateQuickStats(fileStatus, params); - LOG.info("Updated size of table " + tbl.getTableName() +" to "+ params.get(StatsSetupConst.TOTAL_SIZE)); - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - } - } + if (!forceRecompute && params != null && containsAllFastStats(params)) return false; + if (params == null) { + params = new HashMap<>(); + } + if (!isCreate && MetaStoreUtils.isTransactionalTable(tbl.getParameters())) { + // TODO: we should use AcidUtils.getAcidFilesForStats, but cannot access it from metastore. + LOG.warn("Not updating fast stats for a transactional table " + tbl.getTableName()); tbl.setParameters(params); - updated = true; + return true; } - return updated; + if (!newDir) { + // The table location already exists and may contain data. + // Let's try to populate those stats that don't require full scan. + LOG.info("Updating table stats fast for " + tbl.getTableName()); + populateQuickStats(fileStatus, params); + LOG.info("Updated size of table " + tbl.getTableName() +" to "+ params.get(StatsSetupConst.TOTAL_SIZE)); + if (environmentContext != null + && environmentContext.isSetProperties() + && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( + StatsSetupConst.STATS_GENERATED))) { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); + } else { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); + } + } + tbl.setParameters(params); + return true; } - public static void populateQuickStats(FileStatus[] fileStatus, Map params) { + /** This method is invalid for MM and ACID tables unless fileStatus comes from AcidUtils. */ + public static void populateQuickStats(List fileStatus, Map params) { + // Why is this even in metastore? + LOG.trace("Populating quick stats based on {} files", fileStatus.size()); int numFiles = 0; long tableSize = 0L; for (FileStatus status : fileStatus) { - // don't take directories into account for quick stats + // don't take directories into account for quick stats TODO: wtf? if (!status.isDir()) { tableSize += status.getLen(); numFiles += 1; @@ -687,6 +688,12 @@ public static void populateQuickStats(FileStatus[] fileStatus, Map params) { + params.remove(StatsSetupConst.NUM_FILES); + params.remove(StatsSetupConst.TOTAL_SIZE); + } + public static boolean areSameColumns(List oldCols, List newCols) { return ListUtils.isEqualList(oldCols, newCols); @@ -707,16 +714,6 @@ public static void updateBasicState(EnvironmentContext environmentContext, Map params = part.getParameters(); - boolean updated = false; - if (forceRecompute || - params == null || - !containsAllFastStats(params)) { - if (params == null) { - params = new HashMap<>(); - } - if (!madeDir) { - // The partition location already existed and may contain data. Lets try to - // populate those statistics that don't require a full scan of the data. - LOG.warn("Updating partition stats fast for: " + part.getTableName()); - FileStatus[] fileStatus = wh.getFileStatusesForLocation(part.getLocation()); - populateQuickStats(fileStatus, params); - LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); - updateBasicState(environmentContext, params); - } + if (!forceRecompute && params != null && containsAllFastStats(params)) return false; + if (params == null) { + params = new HashMap<>(); + } + if (!isCreate && MetaStoreUtils.isTransactionalTable(table.getParameters())) { + // TODO: implement? + LOG.warn("Not updating fast stats for a transactional table " + table.getTableName()); part.setParameters(params); - updated = true; + return true; } - return updated; + if (!madeDir) { + // The partition location already existed and may contain data. Lets try to + // populate those statistics that don't require a full scan of the data. + LOG.warn("Updating partition stats fast for: " + part.getTableName()); + List fileStatus = wh.getFileStatusesForLocation(part.getLocation()); + // TODO: this is invalid for ACID tables, and we cannot access AcidUtils here. + populateQuickStats(fileStatus, params); + LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); + updateBasicState(environmentContext, params); + } + part.setParameters(params); + return true; } /* @@ -792,6 +793,12 @@ public static boolean columnsIncludedByNameType(List oldCols, } /** Duplicates AcidUtils; used in a couple places in metastore. */ + public static boolean isTransactionalTable(Map params) { + String transactionalProp = params.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); + return (transactionalProp != null && "true".equalsIgnoreCase(transactionalProp)); + } + + /** Duplicates AcidUtils; used in a couple places in metastore. */ public static boolean isInsertOnlyTableParam(Map params) { String transactionalProp = params.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); return (transactionalProp != null && "insert_only".equalsIgnoreCase(transactionalProp)); diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 2599ab103e..7091c5b2f5 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -489,7 +489,7 @@ private static Partition makePartitionObject(String dbName, String tblName, part4.setSd(tbl.getSd().deepCopy()); part4.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo().deepCopy()); part4.getSd().setLocation(tbl.getSd().getLocation() + ptnLocationSuffix); - MetaStoreUtils.updatePartitionStatsFast(part4, warehouse, null); + MetaStoreUtils.updatePartitionStatsFast(part4, tbl, warehouse, false, false, null, true); return part4; }