commit 8a6523662d32a1109ec637b051aeb31dba9a6365 Author: Andrew Sherman Date: Tue Jun 5 17:22:00 2018 -0700 HIVE-18118: provide supportability support for Erasure Coding Update number of Erasure Coded Files in a directory as part of Basic (aka Quick) Stats This information is then (mostly) available through 'EXPLAIN EXTENDED' and 'DESCRIBE EXTENDED' Extend the MiniHS2 Builder to allow configuring the number of datanodes. Add a jdbc MiniHS2/Spark test that uses Erasure Coding. There are some change to StatsSetupConst to make checkstyle happy. diff --git common/src/java/org/apache/hive/common/util/HiveStringUtils.java common/src/java/org/apache/hive/common/util/HiveStringUtils.java index cfe9b2208a60586a05d293f222aa90b37e9a06ac..6b14ad95494173467f0496055385cdd8178c1277 100644 --- common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -31,19 +31,15 @@ import java.util.Arrays; import java.util.Collection; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.HashMap; import java.util.Locale; -import java.util.Properties; +import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Pattern; import com.google.common.base.Splitter; -import com.google.common.collect.Interner; -import com.google.common.collect.Interners; - import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.text.translate.CharSequenceTranslator; import org.apache.commons.lang3.text.translate.EntityArrays; @@ -1062,19 +1058,6 @@ public static String normalizeIdentifier(String identifier) { return identifier.trim().toLowerCase(); } - public static Map getPropertiesExplain(Properties properties) { - if (properties != null) { - String value = properties.getProperty("columns.comments"); - if (value != null) { - // should copy properties first - Map clone = new HashMap(properties); - clone.put("columns.comments", quoteComments(value)); - return clone; - } - } - return properties; - } - public static String quoteComments(String value) { char[] chars = value.toCharArray(); if (!commentProvided(chars)) { diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index d7d7097336fc6be4c2f7a35cd6897e0375486e81..7ef2ced7b1f107ba21fbbb646a434018aeb02758 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -1658,7 +1658,7 @@ public void testDescribe() throws Exception { /** * Get Detailed Table Information via jdbc */ - private String getDetailedTableDescription(Statement stmt, String table) throws SQLException { + static String getDetailedTableDescription(Statement stmt, String table) throws SQLException { String extendedDescription = null; try (ResultSet rs = stmt.executeQuery("describe extended " + table)) { while (rs.next()) { diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java new file mode 100644 index 0000000000000000000000000000000000000000..b0a0145a4ee705b0a7d8f214d2c87397f731faec --- /dev/null +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.jdbc; + +import java.io.IOException; +import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Collections; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.HadoopShims.HdfsErasureCodingShim; +import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.apache.hadoop.hive.ql.QTestUtil.DEFAULT_TEST_EC_POLICY; +import static org.apache.hive.jdbc.TestJdbcWithMiniHS2.getDetailedTableDescription; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Run erasure Coding tests with jdbc. + */ +public class TestJdbcWithMiniHS2ErasureCoding { + private static final String DB_NAME = "ecTestDb"; + private static MiniHS2 miniHS2 = null; + private static HiveConf conf; + private Connection hs2Conn = null; + + private static HiveConf createHiveOnSparkConf() { + HiveConf hiveConf = new HiveConf(); + // Tell dfs not to consider load when choosing a datanode as this can cause failure as + // in a test we do not have spare datanode capacity. + hiveConf.setBoolean("dfs.namenode.redundancy.considerLoad", false); + hiveConf.set("hive.execution.engine", "spark"); + hiveConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + hiveConf.set("spark.master", "local-cluster[2,2,1024]"); + hiveConf.set("hive.spark.client.connect.timeout", "30000ms"); + hiveConf.set("spark.local.dir", + Paths.get(System.getProperty("test.tmp.dir"), "TestJdbcWithMiniHS2ErasureCoding-local-dir") + .toString()); + hiveConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); // avoid ZK errors + return hiveConf; + } + + /** + * Setup a mini HS2 with miniMR. + */ + @BeforeClass + public static void beforeTest() throws Exception { + Class.forName(MiniHS2.getJdbcDriverName()); + conf = createHiveOnSparkConf(); + DriverManager.setLoginTimeout(0); + miniHS2 = new MiniHS2.Builder() + .withConf(conf) + .withMiniMR() + .withDataNodes(5) // sufficient for RS-3-2-1024k + .build(); + miniHS2.start(Collections.emptyMap()); + createDb(); + MiniDFSShim dfs = miniHS2.getDfs(); + addErasurePolicy(dfs, "hdfs:///", DEFAULT_TEST_EC_POLICY); + } + + /** + * Shutdown the mini HS2. + */ + @AfterClass + public static void afterTest() { + if (miniHS2 != null && miniHS2.isStarted()) { + miniHS2.stop(); + } + } + + /** + * Setup a connection to the test database before each test. + */ + @Before + public void setUp() throws Exception { + hs2Conn = DriverManager.getConnection(miniHS2.getJdbcURL(DB_NAME), + System.getProperty("user.name"), "bar"); + } + + /** + * Close connection after each test. + */ + @After + public void tearDown() throws Exception { + if (hs2Conn != null) { + hs2Conn.close(); + } + } + + /** + * Create a database. + */ + private static void createDb() throws Exception { + try (Connection conn = DriverManager.getConnection(miniHS2.getJdbcURL(), + System.getProperty("user.name"), "bar"); + Statement stmt2 = conn.createStatement()) { + stmt2.execute("DROP DATABASE IF EXISTS " + DB_NAME + " CASCADE"); + stmt2.execute("CREATE DATABASE " + DB_NAME); + } + } + + /** + * Test EXPLAIN on fs with Erasure Coding. + */ + @Test + public void testExplainErasureCoding() throws Exception { + try (Statement stmt = hs2Conn.createStatement()) { + String tableName = "pTableEc"; + stmt.execute( + " CREATE TABLE " + tableName + " (userid VARCHAR(64), link STRING, source STRING) " + + "PARTITIONED BY (datestamp STRING, i int) " + + "CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET"); + // insert data to create 2 partitions + stmt.execute("INSERT INTO TABLE " + tableName + + " PARTITION (datestamp = '2014-09-23', i = 1)(userid,link) VALUES ('jsmith', 'mail.com')"); + stmt.execute("INSERT INTO TABLE " + tableName + + " PARTITION (datestamp = '2014-09-24', i = 2)(userid,link) VALUES ('mac', 'superchunk.com')"); + String explain = getExtendedExplain(stmt, "select userid from " + tableName); + assertMatchAndCount(explain, " numFiles 4", 2); + assertMatchAndCount(explain, " numFilesErasureCoded 4", 2); + } + } + + /** + * Test DESCRIBE on fs with Erasure Coding. + */ + @Test + public void testDescribeErasureCoding() throws Exception { + try (Statement stmt = hs2Conn.createStatement()) { + String table = "pageviews"; + stmt.execute(" CREATE TABLE " + table + " (userid VARCHAR(64), link STRING, source STRING) " + + "PARTITIONED BY (datestamp STRING, i int) CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET"); + stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-23', i = 1)" + + "(userid,link) VALUES ('jsmith', 'mail.com')"); + stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-24', i = 1)" + + "(userid,link) VALUES ('dpatel', 'gmail.com')"); + String description = getDetailedTableDescription(stmt, table); + assertMatchAndCount(description, "numFiles=8", 1); + assertMatchAndCount(description, "numFilesErasureCoded=8", 1); + assertMatchAndCount(description, "numPartitions=2", 1); + } + } + + /** + * Add a Erasure Coding Policy to a Path. + */ + private static void addErasurePolicy(MiniDFSShim dfs, String pathString, String policyName) throws IOException { + HadoopShims hadoopShims = ShimLoader.getHadoopShims(); + HdfsErasureCodingShim erasureCodingShim = hadoopShims.createHdfsErasureCodingShim(dfs.getFileSystem(), conf); + erasureCodingShim.enableErasureCodingPolicy(policyName); + Path fsRoot = new Path(pathString); + erasureCodingShim.setErasureCodingPolicy(fsRoot, policyName); + HadoopShims.HdfsFileErasureCodingPolicy erasureCodingPolicy = + erasureCodingShim.getErasureCodingPolicy(fsRoot); + assertEquals(policyName, erasureCodingPolicy.getName()); + } + + /** + * Get Extended Explain output via jdbc. + */ + private static String getExtendedExplain(Statement stmt, String query) throws SQLException { + StringBuilder sb = new StringBuilder(2048); + try (ResultSet rs = stmt.executeQuery("explain extended " + query)) { + while (rs.next()) { + sb.append(rs.getString(1)).append('\n'); + } + } + return sb.toString(); + } + + /** + * Check that the expected string occurs correctly in the output string. + * @param output string to probe + * @param expectedString string to find in output + * @param expectedCount the expected number of occurrences of the expected string + */ + private void assertMatchAndCount(String output, String expectedString, int expectedCount) { + assertTrue("Did not find expected '" + expectedString + "' in text " + + output, output.contains(expectedString)); + assertEquals("wrong count of matches of '" + expectedString + "' in text " + + output, expectedCount, StringUtils.countMatches(output, expectedString)); + } +} diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 385b71e33f6f570c581a0999f04c733d386ff611..072ac4b5c317af468cec5e3b828022296b868221 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -1711,4 +1711,5 @@ druid.kafka.query.files=druidkafkamini_basic.q erasurecoding.shared.query.files=erasure_commands.q # tests to be run only by TestErasureCodingHDFSCliDriver -erasurecoding.only.query.files=erasure_simple.q +erasurecoding.only.query.files=erasure_simple.q,\ + erasure_explain.q diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 2dfd2aad9d78d48fbcc37708376e0996f73c26b6..98aae5c118a1cc5b1aa2de9df226455d9558abd9 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -150,9 +150,9 @@ public static final String TEST_HIVE_USER_PROPERTY = "test.hive.user"; /** - * The Erasure Coding Policy to use in TestErasureCodingHDFSCliDriver. + * The default Erasure Coding Policy to use in Erasure Coding tests. */ - private static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k"; + public static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k"; private String testWarehouse; @Deprecated diff --git itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java index 1700c08d3f37285de43b5d4fe5c77ef55c170235..a78dd739b13482a8bb0bdd779ca06f70990972cf 100644 --- itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java +++ itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java @@ -66,6 +66,7 @@ private static final FsPermission FULL_PERM = new FsPermission((short)00777); private static final FsPermission WRITE_ALL_PERM = new FsPermission((short)00733); private static final String tmpDir = System.getProperty("test.tmp.dir"); + private static final int DEFAULT_DATANODE_COUNT = 4; private HiveServer2 hiveServer2 = null; private final File baseDir; private final Path baseFsDir; @@ -104,6 +105,7 @@ private boolean isMetastoreSecure; private String metastoreServerPrincipal; private String metastoreServerKeyTab; + private int dataNodes = DEFAULT_DATANODE_COUNT; // default number of datanodes for miniHS2 public Builder() { } @@ -162,6 +164,16 @@ public Builder cleanupLocalDirOnStartup(boolean val) { return this; } + /** + * Set the number of datanodes to be used by HS2. + * @param count the number of datanodes + * @return this Builder + */ + public Builder withDataNodes(int count) { + this.dataNodes = count; + return this; + } + public MiniHS2 build() throws Exception { if (miniClusterType == MiniClusterType.MR && useMiniKdc) { throw new IOException("Can't create secure miniMr ... yet"); @@ -173,7 +185,7 @@ public MiniHS2 build() throws Exception { } return new MiniHS2(hiveConf, miniClusterType, useMiniKdc, serverPrincipal, serverKeytab, isMetastoreRemote, usePortsFromConf, authType, isHA, cleanupLocalDirOnStartup, - isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab); + isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab, dataNodes); } } @@ -212,9 +224,8 @@ public boolean isUseMiniKdc() { private MiniHS2(HiveConf hiveConf, MiniClusterType miniClusterType, boolean useMiniKdc, String serverPrincipal, String serverKeytab, boolean isMetastoreRemote, boolean usePortsFromConf, String authType, boolean isHA, boolean cleanupLocalDirOnStartup, - boolean isMetastoreSecure, - String metastoreServerPrincipal, - String metastoreKeyTab) throws Exception { + boolean isMetastoreSecure, String metastoreServerPrincipal, String metastoreKeyTab, + int dataNodes) throws Exception { // Always use localhost for hostname as some tests like SSL CN validation ones // are tied to localhost being present in the certificate name super( @@ -242,7 +253,7 @@ private MiniHS2(HiveConf hiveConf, MiniClusterType miniClusterType, boolean useM if (miniClusterType != MiniClusterType.LOCALFS_ONLY) { // Initialize dfs - dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, 4, true, null, isHA); + dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, dataNodes, true, null, isHA); fs = dfs.getFileSystem(); String uriString = fs.getUri().toString(); @@ -334,7 +345,7 @@ public MiniHS2(HiveConf hiveConf, MiniClusterType clusterType, boolean usePortsF throws Exception { this(hiveConf, clusterType, false, null, null, false, usePortsFromConf, "KERBEROS", false, true, - false, null, null); + false, null, null, DEFAULT_DATANODE_COUNT); } public void start(Map confOverlay) throws Exception { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index b7babd623d7828b449da91a0e024f140ef7e994a..ba0070dd68757e87ba3bb8eb7f1f58c603581613 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -60,16 +60,12 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; -import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -293,39 +289,7 @@ import org.slf4j.LoggerFactory; import org.stringtemplate.v4.ST; -import java.io.BufferedWriter; -import java.io.DataOutputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.io.Writer; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.ExecutionException; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; /** * DDLTask implementation. @@ -2713,7 +2677,7 @@ else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) { String tbl_location = " '" + HiveStringUtils.escapeHiveCommand(sd.getLocation()) + "'"; // Table properties - duplicateProps.addAll(Arrays.asList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS)); + duplicateProps.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS); String tbl_properties = propertiesToString(tbl.getParameters(), duplicateProps); createTab_stmt.add(TEMPORARY, tbl_temp); @@ -3679,7 +3643,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, Map tblProps = tbl.getParameters() == null ? new HashMap() : tbl.getParameters(); Map valueMap = new HashMap<>(); Map stateMap = new HashMap<>(); - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { valueMap.put(stat, 0L); stateMap.put(stat, true); } @@ -3688,7 +3652,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, for (Partition partition : parts) { Map props = partition.getParameters(); Boolean state = StatsSetupConst.areBasicStatsUptoDate(props); - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { stateMap.put(stat, stateMap.get(stat) && state); if (props != null && props.get(stat) != null) { valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat))); @@ -3696,7 +3660,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, } numParts++; } - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat))); tblProps.put(stat, valueMap.get(stat).toString()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 58c8960c096f8885086be4f46dc1e33edd26249a..5d382ae6f3b4fca5c34d0358dafa8762c81e4fa5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -593,7 +593,7 @@ private boolean needToUpdateStats(Map props, EnvironmentContext e return false; } boolean statsPresent = false; - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { String statVal = props.get(stat); if (statVal != null && Long.parseLong(statVal) > 0) { statsPresent = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 7af6dabdf5ece139a5bcd342b02852c21aab5bfd..36cd46aa43f23f482744e7d98a71dd9905d319b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -474,10 +475,16 @@ private static void displayAllParameters(Map params, StringBuild List keys = new ArrayList(params.keySet()); Collections.sort(keys); for (String key : keys) { + String value = params.get(key); + if (key.equals(StatsSetupConst.NUM_ERASURE_CODED_FILES)) { + if ("0".equals(value)) { + continue; + } + } tableInfo.append(FIELD_DELIM); // Ensures all params are indented. formatOutput(key, - escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key)) - : HiveStringUtils.escapeJava(params.get(key)), + escapeUnicode ? StringEscapeUtils.escapeJava(value) + : HiveStringUtils.escapeJava(value), tableInfo, isOutputPadded); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index 326cbedcf0194bfa42b66557fc88f6285df1c619..705365b74c3f9dff4f9fdf01e77027fa9d158101 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -366,6 +366,7 @@ public void showTableStatus(DataOutputStream outStream, public long lastAccessTime = 0; public long lastUpdateTime = 0; public int numOfFiles = 0; + int numOfErasureCodedFiles = 0; } // TODO: why is this in text formatter?!! @@ -416,6 +417,12 @@ private void writeFileSystemStats(DataOutputStream outStream, outStream.write((unknown ? unknownString : "" + fd.numOfFiles).getBytes("UTF-8")); outStream.write(terminator); + if (fd.numOfErasureCodedFiles > 0) { + outStream.write("totalNumberErasureCodedFiles:".getBytes("UTF-8")); + outStream.write((unknown ? unknownString : "" + fd.numOfErasureCodedFiles).getBytes("UTF-8")); + outStream.write(terminator); + } + for (int k = 0; k < indent; k++) { outStream.write(Utilities.INDENT.getBytes("UTF-8")); } @@ -473,6 +480,9 @@ private void processDir(FileStatus status, FileSystem fs, FileData fd) throws IO continue; } fd.numOfFiles++; + if (currentStatus.isErasureCoded()) { + fd.numOfErasureCodedFiles++; + } long fileLen = currentStatus.getLen(); fd.totalFileSize += fileLen; if (fileLen > fd.maxFileSize) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 8e75db9e08d575eb8ba7123251eaca9e2097a7af..689c88895a6529b2c221c361106e07521a1fc8cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -216,7 +216,7 @@ private int convertJoinBucketMapJoin(JoinOperator joinOp, MapJoinOperator mapJoi LOG.debug("Found a big table branch with parent operator {} and position {}", parentOp, pos); bigTablePosition = pos; bigTableFound = true; - bigInputStat = new Statistics(0, Long.MAX_VALUE); + bigInputStat = new Statistics(0, Long.MAX_VALUE, 0); } else { // Either we've found multiple big table branches, or the current branch cannot // be a big table branch. Disable mapjoin for these cases. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index d0be33bd0fd83c829584f069a12e36b278e4d6b2..3c2b0854269d5426153958096a8b5b5ad3612c0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1818,7 +1818,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } - Statistics wcStats = new Statistics(newNumRows, newDataSize); + Statistics wcStats = new Statistics(newNumRows, newDataSize, 0); wcStats.setBasicStatsState(statsState); // evaluate filter expression and update statistics diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 61458b4e256f7bf63f781a3135059257a2b8ddd4..821e428eca07d1b70e0e31287dd973f97d28121f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -45,10 +45,8 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hive.common.util.ReflectionUtil; -import org.apache.hive.common.util.HiveStringUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; - /** * PartitionDesc. * @@ -221,7 +219,7 @@ public Properties getProperties() { @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { - return HiveStringUtils.getPropertiesExplain(getProperties()); + return PlanUtils.getPropertiesExplain(getProperties()); } public void setProperties(final Properties properties) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 2c5b6557ce6462151e764c17064354f448ee708d..250a0850848dee9eba8efa00fdb36985d3342503 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -31,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.LlapOutputFormat; @@ -78,6 +80,7 @@ import org.apache.hadoop.mapred.TextInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hive.common.util.HiveStringUtils.quoteComments; /** * PlanUtils. @@ -1203,4 +1206,32 @@ public static ReadEntity getParentViewInfo(String alias_id, public static Class getDefaultSerDe() { return LazySimpleSerDe.class; } + + /** + * Get a Map of table or partition properties to be used in explain extended output. + * Do some filtering to make output readable and/or concise. + */ + static Map getPropertiesExplain(Properties properties) { + if (properties != null) { + Map clone = null; + String value = properties.getProperty("columns.comments"); + if (value != null) { + // should copy properties first + clone = new HashMap<>(properties); + clone.put("columns.comments", quoteComments(value)); + } + value = properties.getProperty(StatsSetupConst.NUM_ERASURE_CODED_FILES); + if ("0".equals(value)) { + // should copy properties first + if (clone == null) { + clone = new HashMap<>(properties); + } + clone.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); + } + if (clone != null) { + return clone; + } + } + return properties; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 6babe497b254a66418c27572303468ad80e0a55e..bc5f9d943e7f5d24969af0ab5fb898d1d22ae8cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -53,18 +53,20 @@ public State merge(State otherState) { private long numRows; private long runTimeNumRows; private long dataSize; + private long numErasureCodedFiles; private State basicStatsState; private Map columnStats; private State columnStatsState; private boolean runtimeStats; public Statistics() { - this(0, 0); + this(0, 0, 0); } - public Statistics(long nr, long ds) { + public Statistics(long nr, long ds, long numEcFiles) { numRows = nr; dataSize = ds; + numErasureCodedFiles = numEcFiles; runTimeNumRows = -1; columnStats = null; columnStatsState = State.NONE; @@ -137,6 +139,10 @@ public String toString() { } sb.append(" Data size: "); sb.append(dataSize); + if (numErasureCodedFiles > 0) { + sb.append(" Erasure files: "); + sb.append(numErasureCodedFiles); + } sb.append(" Basic stats: "); sb.append(basicStatsState); sb.append(" Column stats: "); @@ -185,7 +191,7 @@ public String extendedToString() { @Override public Statistics clone() { - Statistics clone = new Statistics(numRows, dataSize); + Statistics clone = new Statistics(numRows, dataSize, numErasureCodedFiles); clone.setRunTimeNumRows(runTimeNumRows); clone.setBasicStatsState(basicStatsState); clone.setColumnStatsState(columnStatsState); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 4068e5670fe8a65593228d2e3df9d809e836a696..bbce940c2ed093d0aa86b04248e2ce94f651bcc3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -127,7 +126,7 @@ public Properties getProperties() { @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { - return HiveStringUtils.getPropertiesExplain(getProperties()); + return PlanUtils.getPropertiesExplain(getProperties()); } public void setProperties(final Properties properties) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java index d4d46a3671efdaaed32f63b7262b963cce00b94e..3128ee8200bf5383118dd05a5fa1d16224bbe4d0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java @@ -163,6 +163,7 @@ public void run() { long rawDataSize = 0; long fileSize = 0; long numFiles = 0; + long numErasureCodedFiles = 0; // Note: this code would be invalid for transactional tables of any kind. Utilities.FILE_OP_LOGGER.debug("Aggregating stats for {}", dir); List fileList = null; @@ -190,6 +191,9 @@ public void run() { numRows += statsRR.getStats().getRowCount(); fileSize += file.getLen(); numFiles += 1; + if (file.isErasureCoded()) { + numErasureCodedFiles++; + } } else { throw new HiveException(String.format("Unexpected file found during reading footers for: %s ", file)); } @@ -206,6 +210,7 @@ public void run() { parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); + parameters.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, String.valueOf(numErasureCodedFiles)); if (partish.getPartition() != null) { result = new Partition(partish.getTable(), partish.getPartition().getTPartition()); @@ -224,7 +229,7 @@ public void run() { private String toString(Map parameters) { StringBuilder builder = new StringBuilder(); - for (String statType : StatsSetupConst.supportedStats) { + for (String statType : StatsSetupConst.SUPPORTED_STATS) { String value = parameters.get(statType); if (value != null) { if (builder.length() > 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index f31c170a3081f2d47052aa8390bc69be06ac2cf9..0db90b06bd3078b3082454ab8b08fee3585a3a64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -207,7 +207,7 @@ private String getAggregationPrefix0(Table table, Partition partition) throws Me private void updateStats(StatsAggregator statsAggregator, Map parameters, String aggKey) throws HiveException { - for (String statType : StatsSetupConst.statsRequireCompute) { + for (String statType : StatsSetupConst.STATS_REQUIRE_COMPUTE) { String value = statsAggregator.aggregateStats(aggKey, statType); if (value != null && !value.isEmpty()) { long longValue = Long.parseLong(value); @@ -411,7 +411,7 @@ private StatsCollectionContext getContext() throws HiveException { private String toString(Map parameters) { StringBuilder builder = new StringBuilder(); - for (String statType : StatsSetupConst.supportedStats) { + for (String statType : StatsSetupConst.SUPPORTED_STATS) { String value = parameters.get(statType); if (value != null) { if (builder.length() > 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 494939a7994ea35ed25aea8648a6afb2ead3a248..95a4440a60bfe2dfa0dacf78125b7211235a60b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -261,6 +261,8 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p long nr = basicStats.getNumRows(); List colStats = Lists.newArrayList(); + long numErasureCodedFiles = getErasureCodedFiles(table); + if (fetchColStats) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); if(colStats == null) { @@ -273,7 +275,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p long betterDS = getDataSizeFromColumnStats(nr, colStats); ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } - stats = new Statistics(nr, ds); + stats = new Statistics(nr, ds, numErasureCodedFiles); // infer if any column can be primary key based on column statistics inferAndSetPrimaryKey(stats.getNumRows(), colStats); @@ -308,10 +310,14 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p long nr = bbs.getNumRows(); long ds = bbs.getDataSize(); + List erasureCodedFiles = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), + StatsSetupConst.NUM_ERASURE_CODED_FILES); + long numErasureCodedFiles = getSumIgnoreNegatives(erasureCodedFiles); + if (nr == 0) { nr=1; } - stats = new Statistics(nr, ds); + stats = new Statistics(nr, ds, numErasureCodedFiles); stats.setBasicStatsState(bbs.getState()); if (nr > 0) { // FIXME: this promotion process should be removed later @@ -1655,6 +1661,14 @@ public static long getTotalSize(Table table) { return getBasicStatForTable(table, StatsSetupConst.TOTAL_SIZE); } + /** + * Get number of Erasure Coded files for a table + * @return count of EC files + */ + public static long getErasureCodedFiles(Table table) { + return getBasicStatForTable(table, StatsSetupConst.NUM_ERASURE_CODED_FILES); + } + /** * Get basic stats of table * @param table @@ -1782,7 +1796,7 @@ private static String getFullyQualifiedName(String... names) { } /** - * Get qualified column name from output key column names + * Get qualified column name from output key column names. * @param keyExprs * - output key names * @return list of qualified names diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 60447192b11261727a63219fb2e69f09fd425aa0..611f85a8ceea87281bea760c03816d707d43bba2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -21,8 +21,6 @@ import java.lang.reflect.Modifier; import java.util.HashSet; -import com.google.common.collect.Lists; - import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -469,7 +467,7 @@ private String buildMmCompactionCtQuery( HiveStringUtils.escapeHiveCommand(location)).append("' TBLPROPERTIES ("); // Exclude all standard table properties. Set excludes = getHiveMetastoreConstants(); - excludes.addAll(Lists.newArrayList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS)); + excludes.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS); isFirst = true; for (Map.Entry e : t.getParameters().entrySet()) { if (e.getValue() == null) continue; diff --git ql/src/test/queries/clientpositive/erasure_explain.q ql/src/test/queries/clientpositive/erasure_explain.q new file mode 100644 index 0000000000000000000000000000000000000000..e2954d4a8a1e88afd1f2e2047b7c15a4aa976672 --- /dev/null +++ ql/src/test/queries/clientpositive/erasure_explain.q @@ -0,0 +1,24 @@ +--! qt:dataset:src +--! qt:dataset:srcpart +-- Test explain diagnostics with Erasure Coding + +ERASURE echo listPolicies originally was; +ERASURE listPolicies; + +show table extended like srcpart; + +desc formatted srcpart; + +explain select key, value from srcpart; + +explain extended select key, value from srcpart; + +show table extended like src; + +desc formatted src; + +explain select key, value from src; + +explain extended select key, value from src; + + diff --git ql/src/test/queries/clientpositive/erasure_simple.q ql/src/test/queries/clientpositive/erasure_simple.q index c08409c17787417b986d90a43104f5ddd456e600..cc886c2d300f7306f70c23df866e236e0818a7f0 100644 --- ql/src/test/queries/clientpositive/erasure_simple.q +++ ql/src/test/queries/clientpositive/erasure_simple.q @@ -5,6 +5,7 @@ ERASURE echo listPolicies originally was; ERASURE listPolicies; ERASURE enablePolicy --policy RS-10-4-1024k; +ERASURE enablePolicy --policy XOR-2-1-1024k; ERASURE echo listPolicies after enablePolicy; ERASURE listPolicies; @@ -25,8 +26,20 @@ ERASURE getPolicy --path hdfs:///tmp/erasure_coding1; create table erasure_table (a int) location 'hdfs:///tmp/erasure_coding1/location1'; +-- insert some data with the default policy (RS-3-2-1024k) from the fs root insert into erasure_table values(4); + +-- set a new policy on the directory and insert some data +ERASURE setPolicy --path hdfs:///tmp/erasure_coding1 --policy XOR-2-1-1024k; +insert into erasure_table values(5); + +ERASURE echo policy on older file is; +ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0; +ERASURE echo policy on newer file is; +ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0_copy_1; + +-- show that data is present select * from erasure_table; drop table if exists erasure_table2; diff --git ql/src/test/results/clientnegative/unset_table_property.q.out ql/src/test/results/clientnegative/unset_table_property.q.out index eb308ebc1764f53774874544b2f150b025fdc2a7..20378a1ce01739723065e0604bee46b4859895e0 100644 --- ql/src/test/results/clientnegative/unset_table_property.q.out +++ ql/src/test/results/clientnegative/unset_table_property.q.out @@ -24,6 +24,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 diff --git ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out index ff02643c10cdb3f69d89b0290d06a77b9191d76e..4db5d70960ae8191c271d540dc8a954b931106c0 100644 --- ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out @@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 diff --git ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index 2e44e14e8fa06871d8e04e1afbf6575834d1cbcd..383f2dc4582f472abb3a89937e58a3908db698eb 100644 --- ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -89,6 +89,7 @@ bucketing_version 2 druid.datasource default.cmv_mat_view_n2 druid.segment.granularity HOUR numFiles 0 +numFilesErasureCoded 0 numRows 2 rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler @@ -136,6 +137,7 @@ bucketing_version 2 druid.datasource default.cmv_mat_view2_n0 druid.segment.granularity HOUR numFiles 0 +numFilesErasureCoded 0 numRows 3 rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler @@ -515,6 +517,7 @@ druid.datasource default.cmv_mat_view2_n0 druid.segment.granularity HOUR #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 3 rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler diff --git ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out new file mode 100644 index 0000000000000000000000000000000000000000..8ada9b6c9e0f20afe17708c85756fd2fdbe80b38 --- /dev/null +++ ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out @@ -0,0 +1,409 @@ +ECHO listPolicies originally was +Policy: RS-10-4-1024k DISABLED +Policy: RS-3-2-1024k ENABLED +Policy: RS-6-3-1024k ENABLED +Policy: RS-LEGACY-6-3-1024k DISABLED +Policy: XOR-2-1-1024k DISABLED +PREHOOK: query: show table extended like srcpart +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like srcpart +POSTHOOK: type: SHOW_TABLESTATUS +tableName:srcpart +#### A masked pattern was here #### +location:hdfs://### HDFS PATH ### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:true +partitionColumns:struct partition_columns { string ds, string hr} +totalNumberFiles:4 +totalNumberErasureCodedFiles:4 +totalFileSize:23248 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: desc formatted srcpart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart +POSTHOOK: query: desc formatted srcpart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart +# col_name data_type comment +key string default +value string default + +# Partition Information +# col_name data_type comment +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + bucketing_version 2 + numFiles 4 + numFilesErasureCoded 4 + numPartitions 4 + numRows 2000 + rawDataSize 21248 + totalSize 23248 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select key, value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select key, value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: show table extended like src +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like src +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src +#### A masked pattern was here #### +location:hdfs://### HDFS PATH ### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalNumberErasureCodedFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: desc formatted src +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src +POSTHOOK: query: desc formatted src +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +# col_name data_type comment +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + bucketing_version 2 + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + diff --git ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out index 01f6015a346c1e4283fd6a8cf1eaa3b670450e20..b44cb7de8dd8907f7cd436e803eddaa71ea1b818 100644 --- ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out +++ ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out @@ -5,12 +5,13 @@ Policy: RS-6-3-1024k ENABLED Policy: RS-LEGACY-6-3-1024k DISABLED Policy: XOR-2-1-1024k DISABLED Enabled EC policy 'RS-10-4-1024k' +Enabled EC policy 'XOR-2-1-1024k' ECHO listPolicies after enablePolicy Policy: RS-10-4-1024k ENABLED Policy: RS-3-2-1024k ENABLED Policy: RS-6-3-1024k ENABLED Policy: RS-LEGACY-6-3-1024k DISABLED -Policy: XOR-2-1-1024k DISABLED +Policy: XOR-2-1-1024k ENABLED ECHO original policy on erasure_coding1 EC policy is 'RS-3-2-1024k' ECHO set the default policy on erasure_coding1 @@ -39,6 +40,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@erasure_table POSTHOOK: Lineage: erasure_table.a SCRIPT [] +Set EC policy' XOR-2-1-1024k +PREHOOK: query: insert into erasure_table values(5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@erasure_table +POSTHOOK: query: insert into erasure_table values(5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@erasure_table +POSTHOOK: Lineage: erasure_table.a SCRIPT [] +ECHO policy on older file is +EC policy is 'RS-3-2-1024k' +ECHO policy on newer file is +EC policy is 'XOR-2-1-1024k' PREHOOK: query: select * from erasure_table PREHOOK: type: QUERY PREHOOK: Input: default@erasure_table @@ -48,6 +63,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@erasure_table POSTHOOK: Output: hdfs://### HDFS PATH ### 4 +5 PREHOOK: query: drop table if exists erasure_table2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table if exists erasure_table2 @@ -88,6 +104,7 @@ columns:struct columns { string key, string value} partitioned:false partitionColumns: totalNumberFiles:1 +totalNumberErasureCodedFiles:1 totalFileSize:5812 maxFileSize:5812 minFileSize:5812 @@ -100,6 +117,7 @@ POSTHOOK: query: SHOW TBLPROPERTIES erasure_table2 POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} numFiles 1 +numFilesErasureCoded 1 numRows 500 rawDataSize 5312 totalSize 5812 diff --git ql/src/test/results/clientpositive/llap/materialized_view_create.q.out ql/src/test/results/clientpositive/llap/materialized_view_create.q.out index 9a70096cc650deac7d28250f1b4aacb7391b5f1c..95f8966bea2b6e3a58f3ee7d068b5d05de7bde9b 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create.q.out @@ -245,6 +245,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 key value numFiles 1 +numFilesErasureCoded 0 numRows 5 rawDataSize 1605 totalSize 703 diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out index c3cd89313b6ec6dd6d05abb74f3dfa21d9daedda..71adebb2acad1545c48d45835cd5876434b141b5 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out @@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out index e2972f3b89cde255660e380a437daf9f37718dc9..ce1c281bea0bd2a96e67057ee990fa5b45850905 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out @@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out index e5fb23d760d5674e31e18a9ac6b129f23de56e3a..98f74379f6275aed90273929c65144e4bbd51a97 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out @@ -91,6 +91,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -124,6 +125,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 diff --git ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out index 85092a03d667d8fc2961dd555971d91027e5d6ee..c68c127a02d4883568ab1767258e32410cd8621b 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out @@ -98,6 +98,7 @@ bucketing_version 2 comment this is the first view key foo numFiles 1 +numFilesErasureCoded 0 numRows 5 rawDataSize 580 totalSize 345 diff --git ql/src/test/results/clientpositive/show_tblproperties.q.out ql/src/test/results/clientpositive/show_tblproperties.q.out index e4bda1de4ae1724366cfa2b57529ebc1bbdca0d8..83e1ebddc7521be2ef1529cde12219aa0e99b030 100644 --- ql/src/test/results/clientpositive/show_tblproperties.q.out +++ ql/src/test/results/clientpositive/show_tblproperties.q.out @@ -41,6 +41,7 @@ bar bar value bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true @@ -60,6 +61,7 @@ bar bar value bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true @@ -115,6 +117,7 @@ bar bar value bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true @@ -134,6 +137,7 @@ bar bar value1 bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true1 @@ -159,6 +163,7 @@ bar bar value1 bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true1 diff --git ql/src/test/results/clientpositive/unset_table_view_property.q.out ql/src/test/results/clientpositive/unset_table_view_property.q.out index 588797160cf4f43668e387cc571f701b6ca22330..5d140d68f94610c6c1381428e342724037354959 100644 --- ql/src/test/results/clientpositive/unset_table_view_property.q.out +++ ql/src/test/results/clientpositive/unset_table_view_property.q.out @@ -19,6 +19,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true"}} bucketing_version 2 numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -41,6 +42,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -61,6 +63,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2" bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -84,6 +87,7 @@ c 3 d 4 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -105,6 +109,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -125,6 +130,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2" bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -149,6 +155,7 @@ c 3 d 4 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -171,6 +178,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -192,6 +200,7 @@ a 1 bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 78ea01d9687fe043d63441430c46b30c25cd9756..a7ca05ae0f890c94b7879109a6689fcfef82b5ae 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -22,6 +22,7 @@ import java.util.Map; import java.util.TreeMap; +import com.google.common.collect.ImmutableList; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; @@ -104,6 +105,11 @@ public String getAggregator(Configuration conf) { */ public static final String RAW_DATA_SIZE = "rawDataSize"; + /** + * The name of the statistic for Number of Erasure Coded Files - to be published or gathered. + */ + public static final String NUM_ERASURE_CODED_FILES = "numFilesErasureCoded"; + /** * Temp dir for writing stats from tasks. */ @@ -113,18 +119,20 @@ public String getAggregator(Configuration conf) { /** * List of all supported statistics */ - public static final String[] supportedStats = {NUM_FILES,ROW_COUNT,TOTAL_SIZE,RAW_DATA_SIZE}; + public static final List SUPPORTED_STATS = ImmutableList.of( + NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE, NUM_ERASURE_CODED_FILES); /** * List of all statistics that need to be collected during query execution. These are * statistics that inherently require a scan of the data. */ - public static final String[] statsRequireCompute = new String[] {ROW_COUNT,RAW_DATA_SIZE}; + public static final List STATS_REQUIRE_COMPUTE = ImmutableList.of(ROW_COUNT, RAW_DATA_SIZE); /** * List of statistics that can be collected quickly without requiring a scan of the data. */ - public static final String[] fastStats = new String[] {NUM_FILES,TOTAL_SIZE}; + public static final List FAST_STATS = ImmutableList.of( + NUM_FILES, TOTAL_SIZE, NUM_ERASURE_CODED_FILES); // This string constant is used to indicate to AlterHandler that // alterPartition/alterTable is happening via statsTask or via user. @@ -154,8 +162,9 @@ public String getAggregator(Configuration conf) { public static final String FALSE = "false"; // The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output. - public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { - COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS}; + public static final List TABLE_PARAMS_STATS_KEYS = ImmutableList.of( + COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS, + NUM_ERASURE_CODED_FILES); private static class ColumnStatsAccurate { private static ObjectReader objectReader; @@ -299,7 +308,7 @@ public static void removeColumnStatsState(Map params, List params, List cols, String setting) { if (TRUE.equals(setting)) { - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { params.put(stat, "0"); } } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index c6c04b757a96ae55678d0dc405794d35bda38989..31bf615674a30d9bdb2f8420fa5eac41ce3f6f09 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.metastore; import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_COMMENT; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; @@ -2674,7 +2673,7 @@ private void updateStatsForTruncate(Map props, EnvironmentContext if (null == props) { return; } - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { String statVal = props.get(stat); if (statVal != null) { //In the case of truncate table, we set the stats to be 0. diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index cbe89b682775f0b65bf62b546ec586bfefdca363..73924ee007505f79561b5408d47f1f0184ff9f25 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -95,7 +95,6 @@ import java.util.Map.Entry; import java.util.SortedMap; import java.util.SortedSet; -import java.util.StringJoiner; import java.util.TimeZone; import java.util.TreeMap; import java.util.TreeSet; @@ -628,7 +627,7 @@ public static boolean isView(Table table) { * @return True if the passed Parameters Map contains values for all "Fast Stats". */ private static boolean containsAllFastStats(Map partParams) { - for (String stat : StatsSetupConst.fastStats) { + for (String stat : StatsSetupConst.FAST_STATS) { if (!partParams.containsKey(stat)) { return false; } @@ -639,7 +638,7 @@ private static boolean containsAllFastStats(Map partParams) { public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { // requires to calculate stats if new and old have different fast stats if ((oldPart != null) && (oldPart.getParameters() != null)) { - for (String stat : StatsSetupConst.fastStats) { + for (String stat : StatsSetupConst.FAST_STATS) { if (oldPart.getParameters().containsKey(stat)) { Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); Long newStat = Long.parseLong(newPart.getParameters().get(stat)); @@ -720,20 +719,26 @@ public static void populateQuickStats(List fileStatus, Map params) { params.remove(StatsSetupConst.NUM_FILES); params.remove(StatsSetupConst.TOTAL_SIZE); + params.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); } diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java index 55ff1502d415dea52095cfdd523d01f1e49ce084..d5ae5d1c0d4658335af92e4472bd3985d9f9493f 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; @@ -43,6 +44,7 @@ import static org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE; import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_FILES; +import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_ERASURE_CODED_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED; import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.updateTableStatsSlow; @@ -60,7 +62,11 @@ private static final String DB_NAME = "db1"; private static final String TABLE_NAME = "tbl1"; - private final Map paramsWithStats = ImmutableMap.of(NUM_FILES, "1", TOTAL_SIZE, "2"); + private final Map paramsWithStats = ImmutableMap.of( + NUM_FILES, "1", + TOTAL_SIZE, "2", + NUM_ERASURE_CODED_FILES, "0" + ); private Database db; @@ -120,7 +126,7 @@ public void testcolumnsIncludedByNameType() { *
  • Create database
  • *
  • Create unpartitioned table
  • *
  • Create unpartitioned table which has params
  • - *
  • Call updateTableStatsSlow with arguments which should caue stats calculation
  • + *
  • Call updateTableStatsSlow with arguments which should cause stats calculation
  • *
  • Verify table statistics using mocked warehouse
  • *
  • Create table which already have stats
  • *
  • Call updateTableStatsSlow forcing stats recompute
  • @@ -141,18 +147,17 @@ public void testUpdateTableStatsSlow_statsUpdated() throws TException { // Set up mock warehouse - FileStatus fs1 = new FileStatus(1, true, 2, 3, - 4, new Path("/tmp/0")); - FileStatus fs2 = new FileStatus(fileLength, false, 3, 4, - 5, new Path("/tmp/1")); - FileStatus fs3 = new FileStatus(fileLength, false, 3, 4, - 5, new Path("/tmp/1")); + FileStatus fs1 = getFileStatus(1, true, 2, 3, 4, "/tmp/0", false); + FileStatus fs2 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", true); + FileStatus fs3 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", false); List fileStatus = Arrays.asList(fs1, fs2, fs3); Warehouse wh = mock(Warehouse.class); when(wh.getFileStatusesForUnpartitionedTable(db, tbl)).thenReturn(fileStatus); Map expected = ImmutableMap.of(NUM_FILES, "2", - TOTAL_SIZE, String.valueOf(2 * fileLength)); + TOTAL_SIZE, String.valueOf(2 * fileLength), + NUM_ERASURE_CODED_FILES, "1" + ); updateTableStatsSlow(db, tbl, wh, false, false, null); assertThat(tbl.getParameters(), is(expected)); @@ -195,6 +200,7 @@ public void testUpdateTableStatsSlow_statsUpdated() throws TException { Map expected1 = ImmutableMap.of(NUM_FILES, "2", TOTAL_SIZE, String.valueOf(2 * fileLength), + NUM_ERASURE_CODED_FILES, "1", COLUMN_STATS_ACCURATE, "{\"BASIC_STATS\":\"true\"}"); assertThat(tbl3.getParameters(), is(expected1)); } @@ -227,7 +233,7 @@ public void testUpdateTableStatsSlow_removesDoNotUpdateStats() throws TException } /** - * Verify that updateTableStatsSlow() does not calculate tabe statistics when + * Verify that updateTableStatsSlow() does not calculate table statistics when *
      *
    1. newDir is true
    2. *
    3. Table is partitioned
    4. @@ -270,5 +276,16 @@ public void testUpdateTableStatsSlow_doesNotUpdateStats() throws TException { updateTableStatsSlow(db, tbl2, wh, false, false, null); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl2); } + + /** + * Build a FileStatus object. + */ + private static FileStatus getFileStatus(long fileLength, boolean isdir, int blockReplication, + int blockSize, int modificationTime, String pathString, boolean isErasureCoded) { + return new FileStatus(fileLength, isdir, blockReplication, blockSize, modificationTime, + 0L, (FsPermission)null, (String)null, (String)null, null, + new Path(pathString), false, false, isErasureCoded); + } + }