diff --git beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java index 600d84e..4c55104 100644 --- beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java +++ beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java @@ -63,7 +63,7 @@ public BeelineSiteParser() { locations .add(System.getenv("HIVE_CONF_DIR") + File.separator + DEFAULT_BEELINE_SITE_FILE_NAME); } - locations.add(ETC_HIVE_CONF_LOCATION + DEFAULT_BEELINE_SITE_FILE_NAME); + locations.add(ETC_HIVE_CONF_LOCATION + File.separator + DEFAULT_BEELINE_SITE_FILE_NAME); } @VisibleForTesting diff --git beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java index 9d45daf..47dee4c 100644 --- beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java +++ beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java @@ -56,7 +56,7 @@ public UserHS2ConnectionFileParser() { locations.add( System.getenv("HIVE_CONF_DIR") + File.separator + DEFAULT_CONNECTION_CONFIG_FILE_NAME); } - locations.add(ETC_HIVE_CONF_LOCATION + DEFAULT_CONNECTION_CONFIG_FILE_NAME); + locations.add(ETC_HIVE_CONF_LOCATION + File.separator + DEFAULT_CONNECTION_CONFIG_FILE_NAME); } @VisibleForTesting diff --git beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java new file mode 100644 index 0000000..fc2b44d --- /dev/null +++ beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.hs2connection; + +import java.io.File; +import java.lang.reflect.Field; +import java.util.Collection; +import java.util.List; +import org.junit.Assert; +import org.junit.Test; + +public class TestBeelineSiteParser { + @Test + public void testConfigLocationPathInEtc() throws Exception { + BeelineSiteParser testHS2ConfigManager = + new BeelineSiteParser(); + Field locations = testHS2ConfigManager.getClass().getDeclaredField("locations"); + locations.setAccessible(true); + Collection locs = (List)locations.get(testHS2ConfigManager); + Assert.assertTrue(locs.contains( + BeelineSiteParser.ETC_HIVE_CONF_LOCATION + + File.separator + + BeelineSiteParser.DEFAULT_BEELINE_SITE_FILE_NAME)); + + } +} diff --git beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java index f5923d1..78c3a77 100644 --- beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java +++ beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java @@ -18,7 +18,9 @@ package org.apache.hive.beeline.hs2connection; import java.io.File; +import java.lang.reflect.Field; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import org.apache.hive.beeline.hs2connection.BeelineHS2ConnectionFileParseException; @@ -171,6 +173,20 @@ public void testGetLocationOrder() throws Exception { LOCATION_2.equals(testHS2ConfigManager.getFileLocation())); } + @Test + public void testConfigLocationPathInEtc() throws Exception { + UserHS2ConnectionFileParser testHS2ConfigManager = + new UserHS2ConnectionFileParser(); + Field locations = testHS2ConfigManager.getClass().getDeclaredField("locations"); + locations.setAccessible(true); + Collection locs = (List)locations.get(testHS2ConfigManager); + Assert.assertTrue(locs.contains( + UserHS2ConnectionFileParser.ETC_HIVE_CONF_LOCATION + + File.separator + + UserHS2ConnectionFileParser.DEFAULT_CONNECTION_CONFIG_FILE_NAME)); + + } + private String getParsedUrlFromConfigFile(String filename) throws BeelineHS2ConnectionFileParseException { String path = HiveTestUtils.getFileFromClasspath(filename); diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java index d1f0d98..400262a 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java @@ -181,7 +181,7 @@ private SegmentIdentifier getSegmentIdentifierAndMaybePush(long truncatedTime) { private void pushSegments(List segmentsToPush) { try { SegmentsAndMetadata segmentsAndMetadata = appenderator - .push(segmentsToPush, committerSupplier.get()).get(); + .push(segmentsToPush, committerSupplier.get(), false).get(); final HashSet pushedSegmentIdentifierHashSet = new HashSet<>(); for (DataSegment pushedSegment : segmentsAndMetadata.getSegments()) { diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 9e012ce..b928222 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -456,6 +456,8 @@ minillaplocal.query.files=\ bucketmapjoin6.q,\ bucketmapjoin7.q,\ bucketpruning1.q,\ + reopt_dpp.q,\ + reopt_semijoin.q,\ retry_failure.q,\ retry_failure_stat_changes.q,\ retry_failure_oom.q,\ @@ -504,6 +506,7 @@ minillaplocal.query.files=\ explainanalyze_2.q,\ explainuser_1.q,\ explainuser_4.q,\ + external_jdbc_table.q,\ groupby2.q,\ groupby_groupingset_bug.q,\ hybridgrace_hashjoin_1.q,\ @@ -610,6 +613,7 @@ minillaplocal.query.files=\ results_cache_quoted_identifiers.q,\ results_cache_temptable.q,\ results_cache_transactional.q,\ + results_cache_truncate.q,\ results_cache_with_masking.q,\ sample10.q,\ sample10_mm.q,\ diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java index d883e4b..92c1292 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java @@ -119,7 +119,7 @@ public boolean needTransform() { List privObjs) throws SemanticException { List needRewritePrivObjs = new ArrayList<>(); for (HivePrivilegeObject privObj : privObjs) { - if (privObj.getObjectName().equals("masking_test")) { + if (privObj.getObjectName().equals("masking_test") || privObj.getObjectName().startsWith("masking_test_n")) { privObj.setRowFilterExpression("key % 2 = 0 and key < 10"); List cellValueTransformers = new ArrayList<>(); for (String columnName : privObj.getColumns()) { @@ -131,7 +131,7 @@ public boolean needTransform() { } privObj.setCellValueTransformers(cellValueTransformers); needRewritePrivObjs.add(privObj); - } else if (privObj.getObjectName().equals("masking_test_view")) { + } else if (privObj.getObjectName().equals("masking_test_view") || privObj.getObjectName().startsWith("masking_test_view_n")) { privObj.setRowFilterExpression("key > 6"); List cellValueTransformers = new ArrayList<>(); for (String columnName : privObj.getColumns()) { @@ -143,14 +143,14 @@ public boolean needTransform() { } privObj.setCellValueTransformers(cellValueTransformers); needRewritePrivObjs.add(privObj); - } else if (privObj.getObjectName().equals("masking_test_subq")) { + } else if (privObj.getObjectName().equals("masking_test_subq") || privObj.getObjectName().startsWith("masking_test_subq_n")) { privObj - .setRowFilterExpression("key in (select key from src where src.key = masking_test_subq.key)"); + .setRowFilterExpression("key in (select key from src where src.key = " + privObj.getObjectName() + ".key)"); needRewritePrivObjs.add(privObj); - } else if (privObj.getObjectName().equals("masking_acid_no_masking")) { + } else if (privObj.getObjectName().equals("masking_acid_no_masking") || privObj.getObjectName().startsWith("masking_acid_no_masking_n")) { // testing acid usage when no masking/filtering is present needRewritePrivObjs.add(privObj); - } else if (privObj.getObjectName().equals("masking_test_druid")) { + } else if (privObj.getObjectName().equals("masking_test_druid") || privObj.getObjectName().startsWith("masking_test_druid_n")) { // testing druid queries row filtering is present privObj.setRowFilterExpression("key > 10"); needRewritePrivObjs.add(privObj); diff --git jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcSerDe.java jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcSerDe.java index f5472a0..2e0250d 100644 --- jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcSerDe.java +++ jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcSerDe.java @@ -15,6 +15,7 @@ package org.apache.hive.storage.jdbc; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -66,6 +67,7 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException LOGGER.trace("Initializing the SerDe"); if (tbl.containsKey(JdbcStorageConfig.DATABASE_TYPE.getPropertyName())) { + final boolean hiveQueryExecution = tbl.containsKey(Constants.HIVE_JDBC_QUERY); Configuration tableConfig = JdbcStorageConfigManager.convertPropertiesToConfiguration(tbl); @@ -73,17 +75,24 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException columnNames = dbAccessor.getColumnNames(tableConfig); numColumns = columnNames.size(); List hiveColumnNames; - - String[] hiveColumnNameArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMNS), ","); - if (numColumns != hiveColumnNameArray.length) { - throw new SerDeException("Expected " + numColumns + " columns. Table definition has " - + hiveColumnNameArray.length + " columns"); - } - hiveColumnNames = Arrays.asList(hiveColumnNameArray); - - hiveColumnTypeArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES), ":"); - if (hiveColumnTypeArray.length == 0) { - throw new SerDeException("Received an empty Hive column type definition"); + if (hiveQueryExecution) { + hiveColumnNames = columnNames; + final List columnTypes = dbAccessor.getColumnTypes(tableConfig); + hiveColumnTypeArray = new String[columnTypes.size()]; + hiveColumnTypeArray = columnTypes.toArray(hiveColumnTypeArray); + } else { + + String[] hiveColumnNameArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMNS), ","); + if (numColumns != hiveColumnNameArray.length) { + throw new SerDeException("Expected " + numColumns + " columns. Table definition has " + + hiveColumnNameArray.length + " columns"); + } + hiveColumnNames = Arrays.asList(hiveColumnNameArray); + + hiveColumnTypeArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES), ":"); + if (hiveColumnTypeArray.length == 0) { + throw new SerDeException("Received an empty Hive column type definition"); + } } List fieldInspectors = new ArrayList(numColumns); @@ -94,8 +103,8 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException } objectInspector = - ObjectInspectorFactory.getStandardStructObjectInspector(hiveColumnNames, - fieldInspectors); + ObjectInspectorFactory.getStandardStructObjectInspector(hiveColumnNames, + fieldInspectors); row = new ArrayList(numColumns); } } diff --git jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/DatabaseAccessor.java jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/DatabaseAccessor.java index f2712b8..fdaa794 100644 --- jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/DatabaseAccessor.java +++ jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/DatabaseAccessor.java @@ -24,6 +24,8 @@ List getColumnNames(Configuration conf) throws HiveJdbcDatabaseAccessException; + List getColumnTypes(Configuration conf) throws HiveJdbcDatabaseAccessException; + int getTotalNumberOfRecords(Configuration conf) throws HiveJdbcDatabaseAccessException; JdbcRecordIterator diff --git jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/GenericJdbcDatabaseAccessor.java jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/GenericJdbcDatabaseAccessor.java index af27c48..772bc5d 100644 --- jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/GenericJdbcDatabaseAccessor.java +++ jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/GenericJdbcDatabaseAccessor.java @@ -35,6 +35,7 @@ import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; +import java.sql.Types; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -99,6 +100,69 @@ protected String getMetaDataQuery(Configuration conf) { } @Override + public List getColumnTypes(Configuration conf) throws HiveJdbcDatabaseAccessException { + Connection conn = null; + PreparedStatement ps = null; + ResultSet rs = null; + + try { + initializeDatabaseConnection(conf); + String metadataQuery = getMetaDataQuery(conf); + LOGGER.debug("Query to execute is [{}]", metadataQuery); + + conn = dbcpDataSource.getConnection(); + ps = conn.prepareStatement(metadataQuery); + rs = ps.executeQuery(); + + ResultSetMetaData metadata = rs.getMetaData(); + int numColumns = metadata.getColumnCount(); + List columnTypes = new ArrayList(numColumns); + for (int i = 0; i < numColumns; i++) { + switch (metadata.getColumnType(i + 1)) { + case Types.CHAR: + columnTypes.add(serdeConstants.STRING_TYPE_NAME); + break; + case Types.INTEGER: + columnTypes.add(serdeConstants.INT_TYPE_NAME); + break; + case Types.BIGINT: + columnTypes.add(serdeConstants.BIGINT_TYPE_NAME); + break; + case Types.DECIMAL: + columnTypes.add(serdeConstants.DECIMAL_TYPE_NAME); + break; + case Types.FLOAT: + case Types.REAL: + columnTypes.add(serdeConstants.FLOAT_TYPE_NAME); + break; + case Types.DOUBLE: + columnTypes.add(serdeConstants.DOUBLE_TYPE_NAME); + break; + case Types.DATE: + columnTypes.add(serdeConstants.DATE_TYPE_NAME); + break; + case Types.TIMESTAMP: + columnTypes.add(serdeConstants.TIMESTAMP_TYPE_NAME); + break; + + default: + columnTypes.add(metadata.getColumnTypeName(i+1)); + break; + } + } + + return columnTypes; + } catch (Exception e) { + LOGGER.error("Error while trying to get column names.", e); + throw new HiveJdbcDatabaseAccessException("Error while trying to get column names: " + e.getMessage(), e); + } finally { + cleanupResources(conn, ps, rs); + } + + } + + + @Override public int getTotalNumberOfRecords(Configuration conf) throws HiveJdbcDatabaseAccessException { Connection conn = null; PreparedStatement ps = null; @@ -153,7 +217,7 @@ public int getTotalNumberOfRecords(Configuration conf) throws HiveJdbcDatabaseAc ps.setFetchSize(getFetchSize(conf)); rs = ps.executeQuery(); - return new JdbcRecordIterator(conn, ps, rs, conf.get(serdeConstants.LIST_COLUMN_TYPES)); + return new JdbcRecordIterator(conn, ps, rs); } catch (Exception e) { LOGGER.error("Caught exception while trying to execute query", e); diff --git jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/JdbcRecordIterator.java jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/JdbcRecordIterator.java index d6c2736..13cc6d6 100644 --- jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/JdbcRecordIterator.java +++ jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/dao/JdbcRecordIterator.java @@ -41,15 +41,12 @@ private Connection conn; private PreparedStatement ps; private ResultSet rs; - private ArrayList columnTypes = null; - public JdbcRecordIterator(Connection conn, PreparedStatement ps, ResultSet rs, String typeString) { + + public JdbcRecordIterator(Connection conn, PreparedStatement ps, ResultSet rs) { this.conn = conn; this.ps = ps; this.rs = rs; - if (typeString != null) { - this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(typeString); - } } @@ -74,10 +71,10 @@ public boolean hasNext() { for (int i = 0; i < numColumns; i++) { String key = metadata.getColumnName(i + 1); Object value; - if (columnTypes!=null && columnTypes.get(i) instanceof PrimitiveTypeInfo) { - // This is not a complete list, barely make information schema work - switch (((PrimitiveTypeInfo)columnTypes.get(i)).getTypeName()) { + // This is not a complete list, barely make information schema work + switch (metadata.getColumnTypeName(i+1).toLowerCase()) { case "int": + case "integer": case "smallint": case "tinyint": value = rs.getInt(i + 1); @@ -100,9 +97,11 @@ public boolean hasNext() { case "string": case "char": case "varchar": + case "long varchar": value = rs.getString(i + 1); break; case "datetime": + case "time": value = rs.getDate(i + 1); break; case "timestamp": @@ -111,10 +110,8 @@ public boolean hasNext() { default: value = rs.getObject(i + 1); break; - } - } else { - value = rs.getObject(i + 1); } + record.put(key, value); } diff --git pom.xml pom.xml index 28ad152..604ee76 100644 --- pom.xml +++ pom.xml @@ -143,7 +143,7 @@ 10.14.1.0 3.1.0 0.1.2 - 0.12.0 + 0.12.1 1.2.0-3f79e055 19.0 2.4.11 diff --git ql/src/java/org/apache/hadoop/hive/llap/LlapOutputFormatService.java ql/src/java/org/apache/hadoop/hive/llap/LlapOutputFormatService.java index c71c637..996f8b3 100644 --- ql/src/java/org/apache/hadoop/hive/llap/LlapOutputFormatService.java +++ ql/src/java/org/apache/hadoop/hive/llap/LlapOutputFormatService.java @@ -199,10 +199,12 @@ private void registerReader(ChannelHandlerContext ctx, String id, byte[] tokenBy int maxPendingWrites = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_DAEMON_OUTPUT_SERVICE_MAX_PENDING_WRITES); boolean useArrow = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW); + long allocatorMax = HiveConf.getLongVar(conf, + HiveConf.ConfVars.HIVE_ARROW_ROOT_ALLOCATOR_LIMIT); @SuppressWarnings("rawtypes") RecordWriter writer = null; if(useArrow) { - writer = new LlapArrowRecordWriter(new WritableByteChannelAdapter(ctx, maxPendingWrites, id)); + writer = new LlapArrowRecordWriter(new WritableByteChannelAdapter(ctx, maxPendingWrites, id, allocatorMax)); } else { writer = new LlapRecordWriter(id, new ChunkedOutputStream( diff --git ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java index 57da1d9..753da22 100644 --- ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java +++ ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java @@ -18,7 +18,8 @@ package org.apache.hadoop.hive.llap; -import io.netty.buffer.Unpooled; +import org.apache.hadoop.hive.ql.io.arrow.RootAllocatorFactory; +import io.netty.buffer.ByteBuf; import io.netty.channel.ChannelHandlerContext; import java.io.IOException; @@ -48,6 +49,7 @@ private final Semaphore writeResources; private boolean closed = false; private final String id; + private long allocatorMax; private ChannelFutureListener writeListener = new ChannelFutureListener() { @Override @@ -75,11 +77,12 @@ public void operationComplete(ChannelFuture future) { } }; - public WritableByteChannelAdapter(ChannelHandlerContext chc, int maxPendingWrites, String id) { + public WritableByteChannelAdapter(ChannelHandlerContext chc, int maxPendingWrites, String id, long allocatorMax) { this.chc = chc; this.maxPendingWrites = maxPendingWrites; this.writeResources = new Semaphore(maxPendingWrites); this.id = id; + this.allocatorMax = allocatorMax; } @Override @@ -87,7 +90,9 @@ public int write(ByteBuffer src) throws IOException { int size = src.remaining(); //Down the semaphore or block until available takeWriteResources(1); - chc.writeAndFlush(Unpooled.wrappedBuffer(src)).addListener(writeListener); + ByteBuf buf = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(allocatorMax).buffer(size); + buf.writeBytes(src); + chc.writeAndFlush(buf).addListener(writeListener); return size; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index d177e3f..889bd58 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -147,6 +147,12 @@ public Object next(Object previous) throws IOException { public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException { // If the user hasn't been reading by row, use the fast path. if (rowInBatch >= batch.size) { + if (batch.size > 0) { + // the local batch has been consumed entirely, reset it + batch.reset(); + } + baseRow = super.getRowNumber(); + rowInBatch = 0; return super.nextBatch(theirBatch); } copyIntoBatch(theirBatch, batch, rowInBatch); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 9ad4689..b6825ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1478,7 +1478,13 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { } } - TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec, null); + TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec, null, table); + if(truncateTblDesc.mayNeedWriteId()) { + if(this.ddlDescWithWriteId != null) { + throw new IllegalStateException("ddlDescWithWriteId is already set: " + this.ddlDescWithWriteId); + } + this.ddlDescWithWriteId = truncateTblDesc; + } DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), truncateTblDesc); Task truncateTask = TaskFactory.get(ddlWork); @@ -1757,6 +1763,9 @@ else if(entry.getKey().equals("external") && entry.getValue().equals("true")){ DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), alterTblDesc); if (isPotentialMmSwitch) { + if(this.ddlDescWithWriteId != null) { + throw new IllegalStateException("ddlDescWithWriteId is already set: " + this.ddlDescWithWriteId); + } this.ddlDescWithWriteId = alterTblDesc; ddlWork.setNeedLock(true); // Hmm... why don't many other operations here need locks? } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 43ad7dd..7a63cc4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -126,6 +126,7 @@ KW_SMALLINT: 'SMALLINT'; KW_INT: 'INT' | 'INTEGER'; KW_BIGINT: 'BIGINT'; KW_FLOAT: 'FLOAT'; +KW_REAL: 'REAL'; KW_DOUBLE: 'DOUBLE'; KW_PRECISION: 'PRECISION'; KW_DATE: 'DATE'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 324c804..75a25d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -123,6 +123,7 @@ TOK_INT; TOK_BIGINT; TOK_BOOLEAN; TOK_FLOAT; +TOK_REAL; TOK_DOUBLE; TOK_DATE; TOK_DATELITERAL; @@ -517,6 +518,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_INT", "INT"); xlateMap.put("KW_BIGINT", "BIGINT"); xlateMap.put("KW_FLOAT", "FLOAT"); + xlateMap.put("KW_REAL", "REAL"); xlateMap.put("KW_DOUBLE", "DOUBLE"); xlateMap.put("KW_PRECISION", "PRECISION"); xlateMap.put("KW_DATE", "DATE"); @@ -2522,6 +2524,7 @@ primitiveType | KW_BIGINT -> TOK_BIGINT | KW_BOOLEAN -> TOK_BOOLEAN | KW_FLOAT -> TOK_FLOAT + | KW_REAL -> TOK_FLOAT | KW_DOUBLE KW_PRECISION? -> TOK_DOUBLE | KW_DATE -> TOK_DATE | KW_DATETIME -> TOK_DATETIME diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 7dc6146..9e43ad5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -721,6 +721,7 @@ sysFuncNames | KW_INT | KW_BIGINT | KW_FLOAT + | KW_REAL | KW_DOUBLE | KW_BOOLEAN | KW_STRING @@ -844,5 +845,5 @@ nonReserved //The following SQL2011 reserved keywords are used as function name only, but not as identifiers. sql11ReservedKeywordsUsedAsFunctionName : - KW_IF | KW_ARRAY | KW_MAP | KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_GROUPING | KW_INT | KW_SMALLINT | KW_TIMESTAMP + KW_IF | KW_ARRAY | KW_MAP | KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_REAL | KW_GROUPING | KW_INT | KW_SMALLINT | KW_TIMESTAMP ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index dfd7908..119aa92 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -93,6 +93,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; @@ -105,8 +106,10 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.stats.OperatorStats; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator; import org.slf4j.Logger; @@ -211,6 +214,10 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Shared scans optimization"); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + markOperatorsWithUnstableRuntimeStats(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "markOperatorsWithUnstableRuntimeStats"); + // need a new run of the constant folding because we might have created lots // of "and true and true" conditions. // Rather than run the full constant folding just need to shortcut AND/OR expressions @@ -1006,6 +1013,71 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) ogw.startWalking(topNodes, null); } + private static class MarkRuntimeStatsAsIncorrect implements NodeProcessor { + + private PlanMapper planMapper; + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext; + planMapper = pCtx.getContext().getPlanMapper(); + if (nd instanceof ReduceSinkOperator) { + ReduceSinkOperator rs = (ReduceSinkOperator) nd; + SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); + if (sjInfo == null) { + return null; + } + walkSubtree(sjInfo.getTsOp()); + } + if (nd instanceof AppMasterEventOperator) { + AppMasterEventOperator ame = (AppMasterEventOperator) nd; + AppMasterEventDesc c = ame.getConf(); + if (c instanceof DynamicPruningEventDesc) { + DynamicPruningEventDesc dped = (DynamicPruningEventDesc) c; + mark(dped.getTableScan()); + } + } + return null; + } + + private void walkSubtree(Operator root) { + Deque> deque = new LinkedList<>(); + deque.add(root); + while (!deque.isEmpty()) { + Operator op = deque.pollLast(); + mark(op); + if (op instanceof ReduceSinkOperator) { + // Done with this branch + } else { + deque.addAll(op.getChildOperators()); + } + } + } + + private void mark(Operator op) { + planMapper.link(op, new OperatorStats.IncorrectRuntimeStatsMarker()); + } + + } + + private void markOperatorsWithUnstableRuntimeStats(OptimizeTezProcContext procCtx) throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put( + new RuleRegExp("R1", + ReduceSinkOperator.getOperatorName() + "%"), + new MarkRuntimeStatsAsIncorrect()); + opRules.put( + new RuleRegExp("R2", + AppMasterEventOperator.getOperatorName() + "%"), + new MarkRuntimeStatsAsIncorrect()); + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); + List topNodes = new ArrayList(); + topNodes.addAll(procCtx.parseContext.getTopOps().values()); + GraphWalker ogw = new PreOrderOnceWalker(disp); + ogw.startWalking(topNodes, null); + } + private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperator bigTableTS, ParseContext parseContext, Map semijoins) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index 0b04c0c..ec04a01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -967,4 +967,9 @@ public boolean mayNeedWriteId() { public Long getWriteId() { return this.writeId; } + + @Override + public String toString() { + return this.getClass().getSimpleName() + " for " + getFullTableName(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java index ed68f1a..8c3d852 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java @@ -22,6 +22,9 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -30,25 +33,34 @@ * Truncates managed table or partition */ @Explain(displayName = "Truncate Table or Partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class TruncateTableDesc extends DDLDesc { +public class TruncateTableDesc extends DDLDesc implements DDLDesc.DDLDescWithWriteId { private static final long serialVersionUID = 1L; private String tableName; + private String fullTableName; private Map partSpec; private List columnIndexes; private Path inputDir; private Path outputDir; private ListBucketingCtx lbCtx; private ReplicationSpec replicationSpec; + private long writeId = 0; + private boolean isTransactional; public TruncateTableDesc() { } public TruncateTableDesc(String tableName, Map partSpec, ReplicationSpec replicationSpec) { + this(tableName, partSpec, replicationSpec, null); + } + public TruncateTableDesc(String tableName, Map partSpec, + ReplicationSpec replicationSpec, Table table) { this.tableName = tableName; this.partSpec = partSpec; this.replicationSpec = replicationSpec; + this.isTransactional = AcidUtils.isTransactionalTable(table); + this.fullTableName = table == null ? tableName : Warehouse.getQualifiedName(table.getTTable()); } @Explain(displayName = "TableName", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) @@ -107,4 +119,23 @@ public void setLbCtx(ListBucketingCtx lbCtx) { * This can result in a "TRUNCATE IF NEWER THAN" kind of semantic */ public ReplicationSpec getReplicationSpec() { return this.replicationSpec; } + + @Override + public void setWriteId(long writeId) { + this.writeId = writeId; + } + @Override + public String getFullTableName() { + return fullTableName; + } + @Override + public boolean mayNeedWriteId() { + return isTransactional; + } + + @Override + public String toString() { + return this.getClass().getSimpleName() + " for " + getFullTableName(); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java index 5a62046..823cb87 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java @@ -95,9 +95,14 @@ public static StatsSource getStatsSourceContaining(StatsSource currentStatsSourc } LOG.debug(sb.toString()); } - if (stat.size() >= 1 && sig.size() >= 1) { - map.put(sig.get(0), stat.get(0)); + if (stat.size() < 1 || sig.size() < 1) { + continue; } + if (e.getAll(OperatorStats.IncorrectRuntimeStatsMarker.class).size() > 0) { + LOG.debug("Ignoring {}, marked with OperatorStats.IncorrectRuntimeStatsMarker", sig.get(0)); + continue; + } + map.put(sig.get(0), stat.get(0)); } return map.build(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java index d70bb82..0c56c82 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java @@ -23,6 +23,11 @@ * Holds information an operator's statistics. */ public final class OperatorStats { + + /** Marker class to help with plan elements which will collect invalid statistics */ + public static class IncorrectRuntimeStatsMarker { + } + private String operatorId; private long outputRecords; @@ -67,4 +72,6 @@ public boolean equals(Object obj) { return Objects.equal(operatorId, o.operatorId) && Objects.equal(outputRecords, o.outputRecords); } + + } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index 2071d13..aa99e57 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -154,9 +154,13 @@ private void checkVectorizedReader() throws Exception { VectorizedRowBatch batch = reader.getSchema().createRowBatchV2(); OrcStruct row = null; + long lastRowNumber = -1; // Check Vectorized ORC reader against ORC row reader while (vrr.nextBatch(batch)) { + Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber()); for (int i = 0; i < batch.size; i++) { + Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber()+i); + lastRowNumber = rr.getRowNumber(); row = (OrcStruct) rr.next(row); for (int j = 0; j < batch.cols.length; j++) { Object a = (row.getFieldValue(j)); diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java index 2ad2990..ea0f4d6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java @@ -35,7 +35,7 @@ * ,BOOLEAN,BOTH,BY,CONSTRAINT * ,CREATE,CUBE,CURRENT_DATE,CURRENT_TIMESTAMP,CURSOR, * DATE,DECIMAL,DELETE,DESCRIBE - * ,DOUBLE,DROP,EXISTS,EXTERNAL,FALSE,FETCH,FLOAT,FOR + * ,DOUBLE,DROP,EXISTS,EXTERNAL,FALSE,FETCH,FLOAT,REAL,FOR * ,FOREIGN,FULL,GRANT,GROUP,GROUPING * ,IMPORT,IN,INNER,INSERT,INT,INTERSECT,INTO,IS * ,LATERAL,LEFT,LIKE,LOCAL,MINUS,NONE,NULL @@ -435,6 +435,17 @@ public void testSQL11ReservedKeyWords_FLOAT() { } } + @Test + public void testSQL11ReservedKeyWords_REAL() { + try { + parse("CREATE TABLE REAL (col STRING)"); + Assert.assertFalse("Expected ParseException", true); + } catch (ParseException ex) { + Assert.assertEquals("Failure didn't match.", + "line 1:13 cannot recognize input near 'REAL' '(' 'col' in table name", ex.getMessage()); + } + } + @Test public void testSQL11ReservedKeyWords_FOR() { try { diff --git ql/src/test/queries/clientpositive/external_jdbc_table.q ql/src/test/queries/clientpositive/external_jdbc_table.q new file mode 100644 index 0000000..13f4726 --- /dev/null +++ ql/src/test/queries/clientpositive/external_jdbc_table.q @@ -0,0 +1,186 @@ +--! qt:dataset:src + +set hive.strict.checks.cartesian.product= false; + + +CREATE TABLE simple_hive_table1 (ikey INT, bkey BIGINT, fkey FLOAT, dkey DOUBLE ); + +CREATE TEMPORARY FUNCTION dboutput AS 'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput'; + + +FROM src + +SELECT + +dboutput ( 'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'CREATE TABLE SIMPLE_DERBY_TABLE1 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE)' ), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','20','20','20.0','20.0'), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','-20','-20','-20.0','-20.0'), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','100','-15','65.0','-74.0'), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE1 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','44','53','-455.454','330.76') + +limit 1; + +FROM src + +SELECT + +dboutput ( 'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'CREATE TABLE SIMPLE_DERBY_TABLE2 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE )' ), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','20','20','20.0','20.0'), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','-20','8','9.0','11.0'), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','101','-16','66.0','-75.0'), + +dboutput('jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','', +'INSERT INTO SIMPLE_DERBY_TABLE2 ("ikey","bkey","fkey","dkey") VALUES (?,?,?,?)','40','50','-455.4543','330.767') + +limit 1; + + +CREATE EXTERNAL TABLE ext_simple_derby_table1 +( + ikey int, + bkey bigint, + fkey float, + dkey double +) +STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' +TBLPROPERTIES ( + "hive.sql.database.type" = "DERBY", + "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver", + "hive.sql.jdbc.url" = "jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY", + "hive.sql.dbcp.username" = "APP", + "hive.sql.dbcp.password" = "mine", + "hive.sql.table" = "SIMPLE_DERBY_TABLE1", + "hive.sql.dbcp.maxActive" = "1" +); + + +CREATE EXTERNAL TABLE ext_simple_derby_table2 +( + ikey int, + bkey bigint, + fkey float, + dkey double +) +STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' +TBLPROPERTIES ( + "hive.sql.database.type" = "DERBY", + "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver", + "hive.sql.jdbc.url" = "jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY", + "hive.sql.dbcp.username" = "APP", + "hive.sql.dbcp.password" = "mine", + "hive.sql.table" = "SIMPLE_DERBY_TABLE2", + "hive.sql.dbcp.maxActive" = "1" +); + + +select * from ext_simple_derby_table1; + +--Test projection +select dkey,fkey,bkey,ikey from ext_simple_derby_table1; +select bkey+ikey,fkey+dkey from ext_simple_derby_table1; +select abs(dkey),abs(ikey),abs(fkey),abs(bkey) from ext_simple_derby_table1; + + + +--Test aggregation +select count(*) from ext_simple_derby_table1; +select count(distinct bkey) from ext_simple_derby_table1; +select count(ikey), sum(bkey), avg(dkey), max(fkey) from ext_simple_derby_table1; + + +--Test sort +select dkey from ext_simple_derby_table1 order by dkey; +select SUM_IKEY,bkey from (select sum(-ikey) as SUM_IKEY, bkey from ext_simple_derby_table1 group by bkey) ttt order by bkey; + +--Test filter +explain select bkey from ext_simple_derby_table1 where 100 < ext_simple_derby_table1.ikey; +select bkey from ext_simple_derby_table1 where 100 < ext_simple_derby_table1.ikey; +select sum(bkey) from ext_simple_derby_table1 where ikey = 2450894 OR ikey = 2450911; +SELECT distinct dkey from ext_simple_derby_table1 where ikey = '100'; +SELECT count(*) FROM (select * from ext_simple_derby_table1) v WHERE ikey = 100; +SELECT count(*) from ext_simple_derby_table1 having count(*) > 0; +select sum(8),8 from ext_simple_derby_table1 where ikey = 1 group by 2; + + +--Test join +explain select ext_simple_derby_table1.fkey, ext_simple_derby_table2.dkey from ext_simple_derby_table1 join ext_simple_derby_table2 on +(ext_simple_derby_table1.ikey = ext_simple_derby_table2.ikey); + +select ext_simple_derby_table1.fkey, ext_simple_derby_table2.dkey from ext_simple_derby_table1 join ext_simple_derby_table2 on +(ext_simple_derby_table1.ikey = ext_simple_derby_table2.ikey); + + +explain select simple_hive_table1.fkey, ext_simple_derby_table2.dkey from simple_hive_table1 join ext_simple_derby_table2 on +(simple_hive_table1.ikey = ext_simple_derby_table2.ikey); + +select simple_hive_table1.fkey, ext_simple_derby_table2.dkey from simple_hive_table1 join ext_simple_derby_table2 on +(simple_hive_table1.ikey = ext_simple_derby_table2.ikey); + + +--Test union + +SELECT ikey FROM simple_hive_table1 +UNION +SELECT bkey FROM ext_simple_derby_table2; + + + + + + + + + + + + +----FAILURES---- + +--The following does not work due to invalid generated derby syntax: +--SELECT "dkey", COUNT("bkey") AS "$f1" FROM "SIMPLE_DERBY_TABLE1" GROUP BY "dkey" OFFSET 0 ROWS FETCH NEXT 10 ROWS ONLY {LIMIT 1} + +--SELECT dkey,count(bkey) from ext_simple_derby_table1 group by dkey limit 10; + + + + + +--Fails parse.CalcitePlanner: CBO failed, skipping CBO. +--select sum(fkey) from ext_simple_derby_table1 where bkey in (10, 100); + + + + +--Fails to ClassCastException +-- + + + + +--SELECT ikey FROM ext_simple_derby_table1 +--UNION +--SELECT bkey FROM ext_simple_derby_table2; + + + + + + + +--select dkey from ext_simple_derby_table1 order by dkey limit 10 offset 60; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/jdbc_handler.q ql/src/test/queries/clientpositive/jdbc_handler.q index 4d7effd..d086735 100644 --- ql/src/test/queries/clientpositive/jdbc_handler.q +++ ql/src/test/queries/clientpositive/jdbc_handler.q @@ -21,7 +21,7 @@ limit 1; CREATE EXTERNAL TABLE ext_simple_derby_table ( - kkey bigint + kkey int ) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES ( diff --git ql/src/test/queries/clientpositive/real.q ql/src/test/queries/clientpositive/real.q new file mode 100644 index 0000000..bdc5ff8 --- /dev/null +++ ql/src/test/queries/clientpositive/real.q @@ -0,0 +1,2 @@ +create table realtype (a real, b real); +describe realtype; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/reopt_dpp.q ql/src/test/queries/clientpositive/reopt_dpp.q new file mode 100644 index 0000000..952dcbe --- /dev/null +++ ql/src/test/queries/clientpositive/reopt_dpp.q @@ -0,0 +1,62 @@ +set hive.explain.user=true; +set hive.optimize.index.filter=true; +set hive.auto.convert.join=true; +set hive.vectorized.execution.enabled=true; + +drop table if exists x1_store_sales; +drop table if exists x1_date_dim; +drop table if exists x1_item; + +create table x1_store_sales +( + ss_item_sk int +) +partitioned by (ss_sold_date_sk int) +stored as orc; + +create table x1_date_dim +( + d_date_sk int, + d_month_seq int, + d_year int, + d_moy int +) +stored as orc; + + +insert into x1_date_dim values (1,1,2000,2), + (2,2,2001,2); +insert into x1_store_sales partition (ss_sold_date_sk=1) values (1); +insert into x1_store_sales partition (ss_sold_date_sk=2) values (2); + +alter table x1_store_sales partition (ss_sold_date_sk=1) update statistics set( +'numRows'='123456', +'rawDataSize'='1234567'); + +alter table x1_date_dim update statistics set( +'numRows'='56', +'rawDataSize'='81449'); + + +-- the following query is designed to produce a DPP plan +explain +select count(*) cnt + from + x1_store_sales s + ,x1_date_dim d + where + 1=1 + and s.ss_sold_date_sk = d.d_date_sk + and d.d_year=2000; + +-- tablescan of s should be 2 or 123456 rows; but never 1 +-- and it should not be a mapjoin :) +explain reoptimization +select count(*) cnt + from + x1_store_sales s + ,x1_date_dim d + where + 1=1 + and s.ss_sold_date_sk = d.d_date_sk + and d.d_year=2000; diff --git ql/src/test/queries/clientpositive/reopt_semijoin.q ql/src/test/queries/clientpositive/reopt_semijoin.q new file mode 100644 index 0000000..0eacb8a --- /dev/null +++ ql/src/test/queries/clientpositive/reopt_semijoin.q @@ -0,0 +1,76 @@ +set hive.explain.user=true; +set hive.optimize.index.filter=true; +set hive.auto.convert.join=true; +set hive.vectorized.execution.enabled=true; + +drop table if exists x1_store_sales; +drop table if exists x1_date_dim; + +create table x1_store_sales +( + ss_sold_date_sk int, + ss_item_sk int +) +stored as orc; + +create table x1_date_dim +( + d_date_sk int, + d_month_seq int, + d_year int, + d_moy int +) +stored as orc; + +insert into x1_date_dim values (1,1,2000,1), + (2,2,2001,2), + (3,2,2001,3), + (4,2,2001,4), + (5,2,2001,5), + (6,2,2001,6), + (7,2,2001,7), + (8,2,2001,8); + +insert into x1_store_sales values (1,1),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11); + +alter table x1_store_sales update statistics set( +'numRows'='123456', +'rawDataSize'='1234567'); + +alter table x1_date_dim update statistics set( +'numRows'='56', +'rawDataSize'='81449'); + + +set hive.auto.convert.join.noconditionaltask.size=1; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.optimize.index.filter=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; +set hive.tez.bloom.filter.factor=1.0f; +set hive.explain.user=false; + +explain +select sum(s.ss_item_sk) + from + x1_store_sales s + ,x1_date_dim d + where + 1=1 + and s.ss_sold_date_sk=d.d_date_sk + and d.d_moy=3 +; + +explain reoptimization +select sum(s.ss_item_sk) + from + x1_store_sales s + ,x1_date_dim d + where + 1=1 + and s.ss_sold_date_sk=d.d_date_sk + and d.d_moy=3 +; + + diff --git ql/src/test/queries/clientpositive/results_cache_truncate.q ql/src/test/queries/clientpositive/results_cache_truncate.q new file mode 100644 index 0000000..f806d49 --- /dev/null +++ ql/src/test/queries/clientpositive/results_cache_truncate.q @@ -0,0 +1,61 @@ +--! qt:dataset:src + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table rct1_1 (key string, value string) stored as orc tblproperties ('transactional'='true'); + +insert into rct1_1 select * from default.src; + +set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=false; + +explain +select count(*) from rct1_1; +select count(*) from rct1_1; + +set test.comment="Query on transactional table should use cache"; +set test.comment; +explain +select count(*) from rct1_1; +select count(*) from rct1_1; + +truncate table rct1_1; + +set test.comment="Table truncated - query cache invalidated"; +set test.comment; +explain +select count(*) from rct1_1; +select count(*) from rct1_1; + +create table rct1_2 (key string, value string) partitioned by (p1 string) stored as orc tblproperties ('transactional'='true'); + +insert into rct1_2 partition (p1='part1') select * from default.src; +insert into rct1_2 partition (p1='part2') select * from default.src; + +explain +select count(*) from rct1_2; +select count(*) from rct1_2; + +set test.comment="Query on transactional table should use cache"; +set test.comment; +explain +select count(*) from rct1_2; +select count(*) from rct1_2; + +truncate table rct1_2 partition (p1='part1'); + +set test.comment="Partition truncated - query cache invalidated"; +set test.comment; +explain +select count(*) from rct1_2; +select count(*) from rct1_2; + +truncate table rct1_2; + +set test.comment="Table truncated - query cache invalidated"; +set test.comment; +explain +select count(*) from rct1_2; +select count(*) from rct1_2; + diff --git ql/src/test/queries/clientpositive/vector_delete_orig_table.q ql/src/test/queries/clientpositive/vector_delete_orig_table.q index f914408..48ef5e2 100644 --- ql/