commit 49a3b57fda56a4103e5be4120ce0a58031bc89ce Author: Andrew Sherman Date: Tue Jun 5 17:22:00 2018 -0700 HIVE-18118: provide supportability support for Erasure Coding Update number of Erasure Coded Files in a directory as part of Basic (aka Quick) Stats This information is then (mostly) available through 'EXPLAIN EXTENDED' and 'DESCRIBE EXTENDED' Extend the MiniHS2 Builder to allow configuring the number of datanodes. Add a jdbc MiniHS2/Spark test that uses Erasure Coding. There are some change to StatsSetupConst to make checkstyle happy. diff --git common/src/java/org/apache/hive/common/util/HiveStringUtils.java common/src/java/org/apache/hive/common/util/HiveStringUtils.java index cfe9b2208a60586a05d293f222aa90b37e9a06ac..6b14ad95494173467f0496055385cdd8178c1277 100644 --- common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -31,19 +31,15 @@ import java.util.Arrays; import java.util.Collection; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.HashMap; import java.util.Locale; -import java.util.Properties; +import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Pattern; import com.google.common.base.Splitter; -import com.google.common.collect.Interner; -import com.google.common.collect.Interners; - import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.text.translate.CharSequenceTranslator; import org.apache.commons.lang3.text.translate.EntityArrays; @@ -1062,19 +1058,6 @@ public static String normalizeIdentifier(String identifier) { return identifier.trim().toLowerCase(); } - public static Map getPropertiesExplain(Properties properties) { - if (properties != null) { - String value = properties.getProperty("columns.comments"); - if (value != null) { - // should copy properties first - Map clone = new HashMap(properties); - clone.put("columns.comments", quoteComments(value)); - return clone; - } - } - return properties; - } - public static String quoteComments(String value) { char[] chars = value.toCharArray(); if (!commentProvided(chars)) { diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index d7d7097336fc6be4c2f7a35cd6897e0375486e81..7ef2ced7b1f107ba21fbbb646a434018aeb02758 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -1658,7 +1658,7 @@ public void testDescribe() throws Exception { /** * Get Detailed Table Information via jdbc */ - private String getDetailedTableDescription(Statement stmt, String table) throws SQLException { + static String getDetailedTableDescription(Statement stmt, String table) throws SQLException { String extendedDescription = null; try (ResultSet rs = stmt.executeQuery("describe extended " + table)) { while (rs.next()) { diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java new file mode 100644 index 0000000000000000000000000000000000000000..b0a0145a4ee705b0a7d8f214d2c87397f731faec --- /dev/null +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.jdbc; + +import java.io.IOException; +import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Collections; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.HadoopShims.HdfsErasureCodingShim; +import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.apache.hadoop.hive.ql.QTestUtil.DEFAULT_TEST_EC_POLICY; +import static org.apache.hive.jdbc.TestJdbcWithMiniHS2.getDetailedTableDescription; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Run erasure Coding tests with jdbc. + */ +public class TestJdbcWithMiniHS2ErasureCoding { + private static final String DB_NAME = "ecTestDb"; + private static MiniHS2 miniHS2 = null; + private static HiveConf conf; + private Connection hs2Conn = null; + + private static HiveConf createHiveOnSparkConf() { + HiveConf hiveConf = new HiveConf(); + // Tell dfs not to consider load when choosing a datanode as this can cause failure as + // in a test we do not have spare datanode capacity. + hiveConf.setBoolean("dfs.namenode.redundancy.considerLoad", false); + hiveConf.set("hive.execution.engine", "spark"); + hiveConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + hiveConf.set("spark.master", "local-cluster[2,2,1024]"); + hiveConf.set("hive.spark.client.connect.timeout", "30000ms"); + hiveConf.set("spark.local.dir", + Paths.get(System.getProperty("test.tmp.dir"), "TestJdbcWithMiniHS2ErasureCoding-local-dir") + .toString()); + hiveConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); // avoid ZK errors + return hiveConf; + } + + /** + * Setup a mini HS2 with miniMR. + */ + @BeforeClass + public static void beforeTest() throws Exception { + Class.forName(MiniHS2.getJdbcDriverName()); + conf = createHiveOnSparkConf(); + DriverManager.setLoginTimeout(0); + miniHS2 = new MiniHS2.Builder() + .withConf(conf) + .withMiniMR() + .withDataNodes(5) // sufficient for RS-3-2-1024k + .build(); + miniHS2.start(Collections.emptyMap()); + createDb(); + MiniDFSShim dfs = miniHS2.getDfs(); + addErasurePolicy(dfs, "hdfs:///", DEFAULT_TEST_EC_POLICY); + } + + /** + * Shutdown the mini HS2. + */ + @AfterClass + public static void afterTest() { + if (miniHS2 != null && miniHS2.isStarted()) { + miniHS2.stop(); + } + } + + /** + * Setup a connection to the test database before each test. + */ + @Before + public void setUp() throws Exception { + hs2Conn = DriverManager.getConnection(miniHS2.getJdbcURL(DB_NAME), + System.getProperty("user.name"), "bar"); + } + + /** + * Close connection after each test. + */ + @After + public void tearDown() throws Exception { + if (hs2Conn != null) { + hs2Conn.close(); + } + } + + /** + * Create a database. + */ + private static void createDb() throws Exception { + try (Connection conn = DriverManager.getConnection(miniHS2.getJdbcURL(), + System.getProperty("user.name"), "bar"); + Statement stmt2 = conn.createStatement()) { + stmt2.execute("DROP DATABASE IF EXISTS " + DB_NAME + " CASCADE"); + stmt2.execute("CREATE DATABASE " + DB_NAME); + } + } + + /** + * Test EXPLAIN on fs with Erasure Coding. + */ + @Test + public void testExplainErasureCoding() throws Exception { + try (Statement stmt = hs2Conn.createStatement()) { + String tableName = "pTableEc"; + stmt.execute( + " CREATE TABLE " + tableName + " (userid VARCHAR(64), link STRING, source STRING) " + + "PARTITIONED BY (datestamp STRING, i int) " + + "CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET"); + // insert data to create 2 partitions + stmt.execute("INSERT INTO TABLE " + tableName + + " PARTITION (datestamp = '2014-09-23', i = 1)(userid,link) VALUES ('jsmith', 'mail.com')"); + stmt.execute("INSERT INTO TABLE " + tableName + + " PARTITION (datestamp = '2014-09-24', i = 2)(userid,link) VALUES ('mac', 'superchunk.com')"); + String explain = getExtendedExplain(stmt, "select userid from " + tableName); + assertMatchAndCount(explain, " numFiles 4", 2); + assertMatchAndCount(explain, " numFilesErasureCoded 4", 2); + } + } + + /** + * Test DESCRIBE on fs with Erasure Coding. + */ + @Test + public void testDescribeErasureCoding() throws Exception { + try (Statement stmt = hs2Conn.createStatement()) { + String table = "pageviews"; + stmt.execute(" CREATE TABLE " + table + " (userid VARCHAR(64), link STRING, source STRING) " + + "PARTITIONED BY (datestamp STRING, i int) CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET"); + stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-23', i = 1)" + + "(userid,link) VALUES ('jsmith', 'mail.com')"); + stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-24', i = 1)" + + "(userid,link) VALUES ('dpatel', 'gmail.com')"); + String description = getDetailedTableDescription(stmt, table); + assertMatchAndCount(description, "numFiles=8", 1); + assertMatchAndCount(description, "numFilesErasureCoded=8", 1); + assertMatchAndCount(description, "numPartitions=2", 1); + } + } + + /** + * Add a Erasure Coding Policy to a Path. + */ + private static void addErasurePolicy(MiniDFSShim dfs, String pathString, String policyName) throws IOException { + HadoopShims hadoopShims = ShimLoader.getHadoopShims(); + HdfsErasureCodingShim erasureCodingShim = hadoopShims.createHdfsErasureCodingShim(dfs.getFileSystem(), conf); + erasureCodingShim.enableErasureCodingPolicy(policyName); + Path fsRoot = new Path(pathString); + erasureCodingShim.setErasureCodingPolicy(fsRoot, policyName); + HadoopShims.HdfsFileErasureCodingPolicy erasureCodingPolicy = + erasureCodingShim.getErasureCodingPolicy(fsRoot); + assertEquals(policyName, erasureCodingPolicy.getName()); + } + + /** + * Get Extended Explain output via jdbc. + */ + private static String getExtendedExplain(Statement stmt, String query) throws SQLException { + StringBuilder sb = new StringBuilder(2048); + try (ResultSet rs = stmt.executeQuery("explain extended " + query)) { + while (rs.next()) { + sb.append(rs.getString(1)).append('\n'); + } + } + return sb.toString(); + } + + /** + * Check that the expected string occurs correctly in the output string. + * @param output string to probe + * @param expectedString string to find in output + * @param expectedCount the expected number of occurrences of the expected string + */ + private void assertMatchAndCount(String output, String expectedString, int expectedCount) { + assertTrue("Did not find expected '" + expectedString + "' in text " + + output, output.contains(expectedString)); + assertEquals("wrong count of matches of '" + expectedString + "' in text " + + output, expectedCount, StringUtils.countMatches(output, expectedString)); + } +} diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index aeb6211f5a11f6b9466d731cccb3e55cb03281cb..d2d2057aad4e9d0fd0e26442497a2965358fc1dd 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -1695,4 +1695,5 @@ druid.kafka.query.files=druidkafkamini_basic.q erasurecoding.shared.query.files=erasure_commands.q # tests to be run only by TestErasureCodingHDFSCliDriver -erasurecoding.only.query.files=erasure_simple.q +erasurecoding.only.query.files=erasure_simple.q,\ + erasure_explain.q diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 2106fec7af75958644eb831498b725b771ddf47a..95f033a0cd9fb121e90c241e94877eb4d77669d7 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -153,9 +153,9 @@ public static final String TEST_HIVE_USER_PROPERTY = "test.hive.user"; /** - * The Erasure Coding Policy to use in TestErasureCodingHDFSCliDriver. + * The default Erasure Coding Policy to use in Erasure Coding tests. */ - private static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k"; + public static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k"; private String testWarehouse; private final String testFiles; diff --git itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java index 1700c08d3f37285de43b5d4fe5c77ef55c170235..a78dd739b13482a8bb0bdd779ca06f70990972cf 100644 --- itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java +++ itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java @@ -66,6 +66,7 @@ private static final FsPermission FULL_PERM = new FsPermission((short)00777); private static final FsPermission WRITE_ALL_PERM = new FsPermission((short)00733); private static final String tmpDir = System.getProperty("test.tmp.dir"); + private static final int DEFAULT_DATANODE_COUNT = 4; private HiveServer2 hiveServer2 = null; private final File baseDir; private final Path baseFsDir; @@ -104,6 +105,7 @@ private boolean isMetastoreSecure; private String metastoreServerPrincipal; private String metastoreServerKeyTab; + private int dataNodes = DEFAULT_DATANODE_COUNT; // default number of datanodes for miniHS2 public Builder() { } @@ -162,6 +164,16 @@ public Builder cleanupLocalDirOnStartup(boolean val) { return this; } + /** + * Set the number of datanodes to be used by HS2. + * @param count the number of datanodes + * @return this Builder + */ + public Builder withDataNodes(int count) { + this.dataNodes = count; + return this; + } + public MiniHS2 build() throws Exception { if (miniClusterType == MiniClusterType.MR && useMiniKdc) { throw new IOException("Can't create secure miniMr ... yet"); @@ -173,7 +185,7 @@ public MiniHS2 build() throws Exception { } return new MiniHS2(hiveConf, miniClusterType, useMiniKdc, serverPrincipal, serverKeytab, isMetastoreRemote, usePortsFromConf, authType, isHA, cleanupLocalDirOnStartup, - isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab); + isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab, dataNodes); } } @@ -212,9 +224,8 @@ public boolean isUseMiniKdc() { private MiniHS2(HiveConf hiveConf, MiniClusterType miniClusterType, boolean useMiniKdc, String serverPrincipal, String serverKeytab, boolean isMetastoreRemote, boolean usePortsFromConf, String authType, boolean isHA, boolean cleanupLocalDirOnStartup, - boolean isMetastoreSecure, - String metastoreServerPrincipal, - String metastoreKeyTab) throws Exception { + boolean isMetastoreSecure, String metastoreServerPrincipal, String metastoreKeyTab, + int dataNodes) throws Exception { // Always use localhost for hostname as some tests like SSL CN validation ones // are tied to localhost being present in the certificate name super( @@ -242,7 +253,7 @@ private MiniHS2(HiveConf hiveConf, MiniClusterType miniClusterType, boolean useM if (miniClusterType != MiniClusterType.LOCALFS_ONLY) { // Initialize dfs - dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, 4, true, null, isHA); + dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, dataNodes, true, null, isHA); fs = dfs.getFileSystem(); String uriString = fs.getUri().toString(); @@ -334,7 +345,7 @@ public MiniHS2(HiveConf hiveConf, MiniClusterType clusterType, boolean usePortsF throws Exception { this(hiveConf, clusterType, false, null, null, false, usePortsFromConf, "KERBEROS", false, true, - false, null, null); + false, null, null, DEFAULT_DATANODE_COUNT); } public void start(Map confOverlay) throws Exception { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 8e32b02b59c4e36e0dd610beb6aacf80c3ac555d..563061ae61eaabde37255d265c7d507ddb8a2872 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -60,16 +60,12 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; -import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -293,39 +289,7 @@ import org.slf4j.LoggerFactory; import org.stringtemplate.v4.ST; -import java.io.BufferedWriter; -import java.io.DataOutputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.io.Writer; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.ExecutionException; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; /** * DDLTask implementation. @@ -2713,7 +2677,7 @@ else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) { String tbl_location = " '" + HiveStringUtils.escapeHiveCommand(sd.getLocation()) + "'"; // Table properties - duplicateProps.addAll(Arrays.asList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS)); + duplicateProps.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS); String tbl_properties = propertiesToString(tbl.getParameters(), duplicateProps); createTab_stmt.add(TEMPORARY, tbl_temp); @@ -3679,7 +3643,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, Map tblProps = tbl.getParameters() == null ? new HashMap() : tbl.getParameters(); Map valueMap = new HashMap<>(); Map stateMap = new HashMap<>(); - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { valueMap.put(stat, 0L); stateMap.put(stat, true); } @@ -3688,7 +3652,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, for (Partition partition : parts) { Map props = partition.getParameters(); Boolean state = StatsSetupConst.areBasicStatsUptoDate(props); - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { stateMap.put(stat, stateMap.get(stat) && state); if (props != null && props.get(stat) != null) { valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat))); @@ -3696,7 +3660,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, } numParts++; } - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat))); tblProps.put(stat, valueMap.get(stat).toString()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 58c8960c096f8885086be4f46dc1e33edd26249a..5d382ae6f3b4fca5c34d0358dafa8762c81e4fa5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -593,7 +593,7 @@ private boolean needToUpdateStats(Map props, EnvironmentContext e return false; } boolean statsPresent = false; - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { String statVal = props.get(stat); if (statVal != null && Long.parseLong(statVal) > 0) { statsPresent = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 44687ef471f76bb6c8baee3c9081a191e2d0e74d..dd08261938da02b0091f05aa6220fd7d0c79edb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -474,10 +475,16 @@ private static void displayAllParameters(Map params, StringBuild List keys = new ArrayList(params.keySet()); Collections.sort(keys); for (String key : keys) { + String value = params.get(key); + if (key.equals(StatsSetupConst.NUM_ERASURE_CODED_FILES)) { + if ("0".equals(value)) { + continue; + } + } tableInfo.append(FIELD_DELIM); // Ensures all params are indented. formatOutput(key, - escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key)) - : HiveStringUtils.escapeJava(params.get(key)), + escapeUnicode ? StringEscapeUtils.escapeJava(value) + : HiveStringUtils.escapeJava(value), tableInfo, isOutputPadded); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index 326cbedcf0194bfa42b66557fc88f6285df1c619..705365b74c3f9dff4f9fdf01e77027fa9d158101 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -366,6 +366,7 @@ public void showTableStatus(DataOutputStream outStream, public long lastAccessTime = 0; public long lastUpdateTime = 0; public int numOfFiles = 0; + int numOfErasureCodedFiles = 0; } // TODO: why is this in text formatter?!! @@ -416,6 +417,12 @@ private void writeFileSystemStats(DataOutputStream outStream, outStream.write((unknown ? unknownString : "" + fd.numOfFiles).getBytes("UTF-8")); outStream.write(terminator); + if (fd.numOfErasureCodedFiles > 0) { + outStream.write("totalNumberErasureCodedFiles:".getBytes("UTF-8")); + outStream.write((unknown ? unknownString : "" + fd.numOfErasureCodedFiles).getBytes("UTF-8")); + outStream.write(terminator); + } + for (int k = 0; k < indent; k++) { outStream.write(Utilities.INDENT.getBytes("UTF-8")); } @@ -473,6 +480,9 @@ private void processDir(FileStatus status, FileSystem fs, FileData fd) throws IO continue; } fd.numOfFiles++; + if (currentStatus.isErasureCoded()) { + fd.numOfErasureCodedFiles++; + } long fileLen = currentStatus.getLen(); fd.totalFileSize += fileLen; if (fileLen > fd.maxFileSize) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 8e75db9e08d575eb8ba7123251eaca9e2097a7af..689c88895a6529b2c221c361106e07521a1fc8cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -216,7 +216,7 @@ private int convertJoinBucketMapJoin(JoinOperator joinOp, MapJoinOperator mapJoi LOG.debug("Found a big table branch with parent operator {} and position {}", parentOp, pos); bigTablePosition = pos; bigTableFound = true; - bigInputStat = new Statistics(0, Long.MAX_VALUE); + bigInputStat = new Statistics(0, Long.MAX_VALUE, 0); } else { // Either we've found multiple big table branches, or the current branch cannot // be a big table branch. Disable mapjoin for these cases. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index d0be33bd0fd83c829584f069a12e36b278e4d6b2..3c2b0854269d5426153958096a8b5b5ad3612c0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1818,7 +1818,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } - Statistics wcStats = new Statistics(newNumRows, newDataSize); + Statistics wcStats = new Statistics(newNumRows, newDataSize, 0); wcStats.setBasicStatsState(statsState); // evaluate filter expression and update statistics diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 61458b4e256f7bf63f781a3135059257a2b8ddd4..821e428eca07d1b70e0e31287dd973f97d28121f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -45,10 +45,8 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hive.common.util.ReflectionUtil; -import org.apache.hive.common.util.HiveStringUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; - /** * PartitionDesc. * @@ -221,7 +219,7 @@ public Properties getProperties() { @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { - return HiveStringUtils.getPropertiesExplain(getProperties()); + return PlanUtils.getPropertiesExplain(getProperties()); } public void setProperties(final Properties properties) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 2c5b6557ce6462151e764c17064354f448ee708d..250a0850848dee9eba8efa00fdb36985d3342503 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -31,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.LlapOutputFormat; @@ -78,6 +80,7 @@ import org.apache.hadoop.mapred.TextInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hive.common.util.HiveStringUtils.quoteComments; /** * PlanUtils. @@ -1203,4 +1206,32 @@ public static ReadEntity getParentViewInfo(String alias_id, public static Class getDefaultSerDe() { return LazySimpleSerDe.class; } + + /** + * Get a Map of table or partition properties to be used in explain extended output. + * Do some filtering to make output readable and/or concise. + */ + static Map getPropertiesExplain(Properties properties) { + if (properties != null) { + Map clone = null; + String value = properties.getProperty("columns.comments"); + if (value != null) { + // should copy properties first + clone = new HashMap<>(properties); + clone.put("columns.comments", quoteComments(value)); + } + value = properties.getProperty(StatsSetupConst.NUM_ERASURE_CODED_FILES); + if ("0".equals(value)) { + // should copy properties first + if (clone == null) { + clone = new HashMap<>(properties); + } + clone.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); + } + if (clone != null) { + return clone; + } + } + return properties; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index fd461ae52930de54d993f0df74c0635f82fcc799..574df2e7d346aa4f99d80c968e4a6914a5792043 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -46,18 +46,20 @@ boolean morePreciseThan(State other) { private long numRows; private long runTimeNumRows; private long dataSize; + private long numErasureCodedFiles; private State basicStatsState; private Map columnStats; private State columnStatsState; private boolean runtimeStats; public Statistics() { - this(0, 0); + this(0, 0, 0); } - public Statistics(long nr, long ds) { + public Statistics(long nr, long ds, long numEcFiles) { numRows = nr; dataSize = ds; + numErasureCodedFiles = numEcFiles; runTimeNumRows = -1; columnStats = null; columnStatsState = State.NONE; @@ -130,6 +132,10 @@ public String toString() { } sb.append(" Data size: "); sb.append(dataSize); + if (numErasureCodedFiles > 0) { + sb.append(" Erasure files: "); + sb.append(numErasureCodedFiles); + } sb.append(" Basic stats: "); sb.append(basicStatsState); sb.append(" Column stats: "); @@ -178,7 +184,7 @@ public String extendedToString() { @Override public Statistics clone() { - Statistics clone = new Statistics(numRows, dataSize); + Statistics clone = new Statistics(numRows, dataSize, numErasureCodedFiles); clone.setRunTimeNumRows(runTimeNumRows); clone.setBasicStatsState(basicStatsState); clone.setColumnStatsState(columnStatsState); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 4068e5670fe8a65593228d2e3df9d809e836a696..bbce940c2ed093d0aa86b04248e2ce94f651bcc3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -127,7 +126,7 @@ public Properties getProperties() { @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { - return HiveStringUtils.getPropertiesExplain(getProperties()); + return PlanUtils.getPropertiesExplain(getProperties()); } public void setProperties(final Properties properties) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java index d4d46a3671efdaaed32f63b7262b963cce00b94e..3128ee8200bf5383118dd05a5fa1d16224bbe4d0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java @@ -163,6 +163,7 @@ public void run() { long rawDataSize = 0; long fileSize = 0; long numFiles = 0; + long numErasureCodedFiles = 0; // Note: this code would be invalid for transactional tables of any kind. Utilities.FILE_OP_LOGGER.debug("Aggregating stats for {}", dir); List fileList = null; @@ -190,6 +191,9 @@ public void run() { numRows += statsRR.getStats().getRowCount(); fileSize += file.getLen(); numFiles += 1; + if (file.isErasureCoded()) { + numErasureCodedFiles++; + } } else { throw new HiveException(String.format("Unexpected file found during reading footers for: %s ", file)); } @@ -206,6 +210,7 @@ public void run() { parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); + parameters.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, String.valueOf(numErasureCodedFiles)); if (partish.getPartition() != null) { result = new Partition(partish.getTable(), partish.getPartition().getTPartition()); @@ -224,7 +229,7 @@ public void run() { private String toString(Map parameters) { StringBuilder builder = new StringBuilder(); - for (String statType : StatsSetupConst.supportedStats) { + for (String statType : StatsSetupConst.SUPPORTED_STATS) { String value = parameters.get(statType); if (value != null) { if (builder.length() > 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index 8c238871765b0d5312a459a0e7f68c81f3837c13..7f7778d4e50034d50fe122894cabf28cf55ec952 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -207,7 +207,7 @@ private String getAggregationPrefix0(Table table, Partition partition) throws Me private void updateStats(StatsAggregator statsAggregator, Map parameters, String aggKey, boolean isFullAcid) throws HiveException { - for (String statType : StatsSetupConst.statsRequireCompute) { + for (String statType : StatsSetupConst.STATS_REQUIRE_COMPUTE) { if (isFullAcid && !work.isTargetRewritten()) { // Don't bother with aggregation in this case, it will probably be invalid. parameters.remove(statType); @@ -417,7 +417,7 @@ private StatsCollectionContext getContext() throws HiveException { private String toString(Map parameters) { StringBuilder builder = new StringBuilder(); - for (String statType : StatsSetupConst.supportedStats) { + for (String statType : StatsSetupConst.SUPPORTED_STATS) { String value = parameters.get(statType); if (value != null) { if (builder.length() > 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index e00098529f78aa4950c8cec301b966999ae9bf96..24718ff71ae53b3e7363a5df8e912aaf92d8f786 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -339,6 +339,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p // we would like to avoid file system calls if it too expensive long ds = shouldEstimateStats? getDataSize(conf, table): getRawDataSize(table); long nr = getNumRows(conf, schema, table, ds); + long numErasureCodedFiles = getErasureCodedFiles(table); List colStats = Lists.newArrayList(); if (fetchColStats) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); @@ -352,7 +353,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p long betterDS = getDataSizeFromColumnStats(nr, colStats); ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } - stats = new Statistics(nr, ds); + stats = new Statistics(nr, ds, numErasureCodedFiles); // infer if any column can be primary key based on column statistics inferAndSetPrimaryKey(stats.getNumRows(), colStats); @@ -363,15 +364,20 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p // the partitions that are not required long nr = 0; long ds = 0; + long numErasureCodedFiles = 0; - List rowCounts = Lists.newArrayList(); - List dataSizes = Lists.newArrayList(); + List rowCounts; + List dataSizes; + List erasureCodedFiles; rowCounts = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT); dataSizes = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.RAW_DATA_SIZE); + erasureCodedFiles = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), + StatsSetupConst.NUM_ERASURE_CODED_FILES); nr = getSumIgnoreNegatives(rowCounts); ds = getSumIgnoreNegatives(dataSizes); + numErasureCodedFiles = getSumIgnoreNegatives(erasureCodedFiles); if (ds <= 0) { dataSizes = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.TOTAL_SIZE); dataSizes = safeMult(dataSizes, deserFactor); @@ -402,7 +408,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p if (nr == 0) { nr = 1; } - stats = new Statistics(nr, ds); + stats = new Statistics(nr, ds, numErasureCodedFiles); // if at least a partition does not contain row count then mark basic stats state as PARTIAL if (containsNonPositives(rowCounts) && @@ -1763,6 +1769,14 @@ public static long getTotalSize(Table table) { return getBasicStatForTable(table, StatsSetupConst.TOTAL_SIZE); } + /** + * Get number of Erasure Coded files for a table + * @return count of EC files + */ + public static long getErasureCodedFiles(Table table) { + return getBasicStatForTable(table, StatsSetupConst.NUM_ERASURE_CODED_FILES); + } + /** * Get basic stats of table * @param table @@ -1889,7 +1903,7 @@ private static String getFullyQualifiedName(String... names) { } /** - * Get qualified column name from output key column names + * Get qualified column name from output key column names. * @param keyExprs * - output key names * @return list of qualified names diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 60447192b11261727a63219fb2e69f09fd425aa0..611f85a8ceea87281bea760c03816d707d43bba2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -21,8 +21,6 @@ import java.lang.reflect.Modifier; import java.util.HashSet; -import com.google.common.collect.Lists; - import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -469,7 +467,7 @@ private String buildMmCompactionCtQuery( HiveStringUtils.escapeHiveCommand(location)).append("' TBLPROPERTIES ("); // Exclude all standard table properties. Set excludes = getHiveMetastoreConstants(); - excludes.addAll(Lists.newArrayList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS)); + excludes.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS); isFirst = true; for (Map.Entry e : t.getParameters().entrySet()) { if (e.getValue() == null) continue; diff --git ql/src/test/queries/clientpositive/erasure_explain.q ql/src/test/queries/clientpositive/erasure_explain.q new file mode 100644 index 0000000000000000000000000000000000000000..e2954d4a8a1e88afd1f2e2047b7c15a4aa976672 --- /dev/null +++ ql/src/test/queries/clientpositive/erasure_explain.q @@ -0,0 +1,24 @@ +--! qt:dataset:src +--! qt:dataset:srcpart +-- Test explain diagnostics with Erasure Coding + +ERASURE echo listPolicies originally was; +ERASURE listPolicies; + +show table extended like srcpart; + +desc formatted srcpart; + +explain select key, value from srcpart; + +explain extended select key, value from srcpart; + +show table extended like src; + +desc formatted src; + +explain select key, value from src; + +explain extended select key, value from src; + + diff --git ql/src/test/queries/clientpositive/erasure_simple.q ql/src/test/queries/clientpositive/erasure_simple.q index c08409c17787417b986d90a43104f5ddd456e600..cc886c2d300f7306f70c23df866e236e0818a7f0 100644 --- ql/src/test/queries/clientpositive/erasure_simple.q +++ ql/src/test/queries/clientpositive/erasure_simple.q @@ -5,6 +5,7 @@ ERASURE echo listPolicies originally was; ERASURE listPolicies; ERASURE enablePolicy --policy RS-10-4-1024k; +ERASURE enablePolicy --policy XOR-2-1-1024k; ERASURE echo listPolicies after enablePolicy; ERASURE listPolicies; @@ -25,8 +26,20 @@ ERASURE getPolicy --path hdfs:///tmp/erasure_coding1; create table erasure_table (a int) location 'hdfs:///tmp/erasure_coding1/location1'; +-- insert some data with the default policy (RS-3-2-1024k) from the fs root insert into erasure_table values(4); + +-- set a new policy on the directory and insert some data +ERASURE setPolicy --path hdfs:///tmp/erasure_coding1 --policy XOR-2-1-1024k; +insert into erasure_table values(5); + +ERASURE echo policy on older file is; +ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0; +ERASURE echo policy on newer file is; +ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0_copy_1; + +-- show that data is present select * from erasure_table; drop table if exists erasure_table2; diff --git ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out new file mode 100644 index 0000000000000000000000000000000000000000..8ada9b6c9e0f20afe17708c85756fd2fdbe80b38 --- /dev/null +++ ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out @@ -0,0 +1,409 @@ +ECHO listPolicies originally was +Policy: RS-10-4-1024k DISABLED +Policy: RS-3-2-1024k ENABLED +Policy: RS-6-3-1024k ENABLED +Policy: RS-LEGACY-6-3-1024k DISABLED +Policy: XOR-2-1-1024k DISABLED +PREHOOK: query: show table extended like srcpart +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like srcpart +POSTHOOK: type: SHOW_TABLESTATUS +tableName:srcpart +#### A masked pattern was here #### +location:hdfs://### HDFS PATH ### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:true +partitionColumns:struct partition_columns { string ds, string hr} +totalNumberFiles:4 +totalNumberErasureCodedFiles:4 +totalFileSize:23248 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: desc formatted srcpart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart +POSTHOOK: query: desc formatted srcpart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart +# col_name data_type comment +key string default +value string default + +# Partition Information +# col_name data_type comment +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + bucketing_version 2 + numFiles 4 + numFilesErasureCoded 4 + numPartitions 4 + numRows 2000 + rawDataSize 21248 + totalSize 23248 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select key, value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select key, value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: show table extended like src +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like src +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src +#### A masked pattern was here #### +location:hdfs://### HDFS PATH ### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalNumberErasureCodedFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: desc formatted src +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src +POSTHOOK: query: desc formatted src +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +# col_name data_type comment +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + bucketing_version 2 + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + diff --git ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out index 01f6015a346c1e4283fd6a8cf1eaa3b670450e20..b44cb7de8dd8907f7cd436e803eddaa71ea1b818 100644 --- ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out +++ ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out @@ -5,12 +5,13 @@ Policy: RS-6-3-1024k ENABLED Policy: RS-LEGACY-6-3-1024k DISABLED Policy: XOR-2-1-1024k DISABLED Enabled EC policy 'RS-10-4-1024k' +Enabled EC policy 'XOR-2-1-1024k' ECHO listPolicies after enablePolicy Policy: RS-10-4-1024k ENABLED Policy: RS-3-2-1024k ENABLED Policy: RS-6-3-1024k ENABLED Policy: RS-LEGACY-6-3-1024k DISABLED -Policy: XOR-2-1-1024k DISABLED +Policy: XOR-2-1-1024k ENABLED ECHO original policy on erasure_coding1 EC policy is 'RS-3-2-1024k' ECHO set the default policy on erasure_coding1 @@ -39,6 +40,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@erasure_table POSTHOOK: Lineage: erasure_table.a SCRIPT [] +Set EC policy' XOR-2-1-1024k +PREHOOK: query: insert into erasure_table values(5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@erasure_table +POSTHOOK: query: insert into erasure_table values(5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@erasure_table +POSTHOOK: Lineage: erasure_table.a SCRIPT [] +ECHO policy on older file is +EC policy is 'RS-3-2-1024k' +ECHO policy on newer file is +EC policy is 'XOR-2-1-1024k' PREHOOK: query: select * from erasure_table PREHOOK: type: QUERY PREHOOK: Input: default@erasure_table @@ -48,6 +63,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@erasure_table POSTHOOK: Output: hdfs://### HDFS PATH ### 4 +5 PREHOOK: query: drop table if exists erasure_table2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table if exists erasure_table2 @@ -88,6 +104,7 @@ columns:struct columns { string key, string value} partitioned:false partitionColumns: totalNumberFiles:1 +totalNumberErasureCodedFiles:1 totalFileSize:5812 maxFileSize:5812 minFileSize:5812 @@ -100,6 +117,7 @@ POSTHOOK: query: SHOW TBLPROPERTIES erasure_table2 POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} numFiles 1 +numFilesErasureCoded 1 numRows 500 rawDataSize 5312 totalSize 5812 diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 78ea01d9687fe043d63441430c46b30c25cd9756..a7ca05ae0f890c94b7879109a6689fcfef82b5ae 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -22,6 +22,7 @@ import java.util.Map; import java.util.TreeMap; +import com.google.common.collect.ImmutableList; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; @@ -104,6 +105,11 @@ public String getAggregator(Configuration conf) { */ public static final String RAW_DATA_SIZE = "rawDataSize"; + /** + * The name of the statistic for Number of Erasure Coded Files - to be published or gathered. + */ + public static final String NUM_ERASURE_CODED_FILES = "numFilesErasureCoded"; + /** * Temp dir for writing stats from tasks. */ @@ -113,18 +119,20 @@ public String getAggregator(Configuration conf) { /** * List of all supported statistics */ - public static final String[] supportedStats = {NUM_FILES,ROW_COUNT,TOTAL_SIZE,RAW_DATA_SIZE}; + public static final List SUPPORTED_STATS = ImmutableList.of( + NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE, NUM_ERASURE_CODED_FILES); /** * List of all statistics that need to be collected during query execution. These are * statistics that inherently require a scan of the data. */ - public static final String[] statsRequireCompute = new String[] {ROW_COUNT,RAW_DATA_SIZE}; + public static final List STATS_REQUIRE_COMPUTE = ImmutableList.of(ROW_COUNT, RAW_DATA_SIZE); /** * List of statistics that can be collected quickly without requiring a scan of the data. */ - public static final String[] fastStats = new String[] {NUM_FILES,TOTAL_SIZE}; + public static final List FAST_STATS = ImmutableList.of( + NUM_FILES, TOTAL_SIZE, NUM_ERASURE_CODED_FILES); // This string constant is used to indicate to AlterHandler that // alterPartition/alterTable is happening via statsTask or via user. @@ -154,8 +162,9 @@ public String getAggregator(Configuration conf) { public static final String FALSE = "false"; // The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output. - public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { - COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS}; + public static final List TABLE_PARAMS_STATS_KEYS = ImmutableList.of( + COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS, + NUM_ERASURE_CODED_FILES); private static class ColumnStatsAccurate { private static ObjectReader objectReader; @@ -299,7 +308,7 @@ public static void removeColumnStatsState(Map params, List params, List cols, String setting) { if (TRUE.equals(setting)) { - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { params.put(stat, "0"); } } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index e88f9a5fee4b2cbd99ec7c5c5350f8c2b8015384..45e0c1ce0a096fb9529e38486e40b0f1763aceff 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.metastore; import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_COMMENT; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; @@ -2676,7 +2675,7 @@ private void updateStatsForTruncate(Map props, EnvironmentContext if (null == props) { return; } - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { String statVal = props.get(stat); if (statVal != null) { //In the case of truncate table, we set the stats to be 0. diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 16a8c758010c1f81e07296362157bb24260bcf3f..22ad872b798fd4deb20867b5167f69bcc25d84ac 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -95,7 +95,6 @@ import java.util.Map.Entry; import java.util.SortedMap; import java.util.SortedSet; -import java.util.StringJoiner; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.Callable; @@ -620,7 +619,7 @@ public static boolean isView(Table table) { * @return True if the passed Parameters Map contains values for all "Fast Stats". */ private static boolean containsAllFastStats(Map partParams) { - for (String stat : StatsSetupConst.fastStats) { + for (String stat : StatsSetupConst.FAST_STATS) { if (!partParams.containsKey(stat)) { return false; } @@ -631,7 +630,7 @@ private static boolean containsAllFastStats(Map partParams) { public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { // requires to calculate stats if new and old have different fast stats if ((oldPart != null) && (oldPart.getParameters() != null)) { - for (String stat : StatsSetupConst.fastStats) { + for (String stat : StatsSetupConst.FAST_STATS) { if (oldPart.getParameters().containsKey(stat)) { Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); Long newStat = Long.parseLong(newPart.getParameters().get(stat)); @@ -712,20 +711,26 @@ public static void populateQuickStats(List fileStatus, Map params) { params.remove(StatsSetupConst.NUM_FILES); params.remove(StatsSetupConst.TOTAL_SIZE); + params.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); } diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java index 55ff1502d415dea52095cfdd523d01f1e49ce084..d5ae5d1c0d4658335af92e4472bd3985d9f9493f 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; @@ -43,6 +44,7 @@ import static org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE; import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_FILES; +import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_ERASURE_CODED_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED; import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.updateTableStatsSlow; @@ -60,7 +62,11 @@ private static final String DB_NAME = "db1"; private static final String TABLE_NAME = "tbl1"; - private final Map paramsWithStats = ImmutableMap.of(NUM_FILES, "1", TOTAL_SIZE, "2"); + private final Map paramsWithStats = ImmutableMap.of( + NUM_FILES, "1", + TOTAL_SIZE, "2", + NUM_ERASURE_CODED_FILES, "0" + ); private Database db; @@ -120,7 +126,7 @@ public void testcolumnsIncludedByNameType() { *
  • Create database
  • *
  • Create unpartitioned table
  • *
  • Create unpartitioned table which has params
  • - *
  • Call updateTableStatsSlow with arguments which should caue stats calculation
  • + *
  • Call updateTableStatsSlow with arguments which should cause stats calculation
  • *
  • Verify table statistics using mocked warehouse
  • *
  • Create table which already have stats
  • *
  • Call updateTableStatsSlow forcing stats recompute
  • @@ -141,18 +147,17 @@ public void testUpdateTableStatsSlow_statsUpdated() throws TException { // Set up mock warehouse - FileStatus fs1 = new FileStatus(1, true, 2, 3, - 4, new Path("/tmp/0")); - FileStatus fs2 = new FileStatus(fileLength, false, 3, 4, - 5, new Path("/tmp/1")); - FileStatus fs3 = new FileStatus(fileLength, false, 3, 4, - 5, new Path("/tmp/1")); + FileStatus fs1 = getFileStatus(1, true, 2, 3, 4, "/tmp/0", false); + FileStatus fs2 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", true); + FileStatus fs3 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", false); List fileStatus = Arrays.asList(fs1, fs2, fs3); Warehouse wh = mock(Warehouse.class); when(wh.getFileStatusesForUnpartitionedTable(db, tbl)).thenReturn(fileStatus); Map expected = ImmutableMap.of(NUM_FILES, "2", - TOTAL_SIZE, String.valueOf(2 * fileLength)); + TOTAL_SIZE, String.valueOf(2 * fileLength), + NUM_ERASURE_CODED_FILES, "1" + ); updateTableStatsSlow(db, tbl, wh, false, false, null); assertThat(tbl.getParameters(), is(expected)); @@ -195,6 +200,7 @@ public void testUpdateTableStatsSlow_statsUpdated() throws TException { Map expected1 = ImmutableMap.of(NUM_FILES, "2", TOTAL_SIZE, String.valueOf(2 * fileLength), + NUM_ERASURE_CODED_FILES, "1", COLUMN_STATS_ACCURATE, "{\"BASIC_STATS\":\"true\"}"); assertThat(tbl3.getParameters(), is(expected1)); } @@ -227,7 +233,7 @@ public void testUpdateTableStatsSlow_removesDoNotUpdateStats() throws TException } /** - * Verify that updateTableStatsSlow() does not calculate tabe statistics when + * Verify that updateTableStatsSlow() does not calculate table statistics when *
      *
    1. newDir is true
    2. *
    3. Table is partitioned
    4. @@ -270,5 +276,16 @@ public void testUpdateTableStatsSlow_doesNotUpdateStats() throws TException { updateTableStatsSlow(db, tbl2, wh, false, false, null); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl2); } + + /** + * Build a FileStatus object. + */ + private static FileStatus getFileStatus(long fileLength, boolean isdir, int blockReplication, + int blockSize, int modificationTime, String pathString, boolean isErasureCoded) { + return new FileStatus(fileLength, isdir, blockReplication, blockSize, modificationTime, + 0L, (FsPermission)null, (String)null, (String)null, null, + new Path(pathString), false, false, isErasureCoded); + } + }