diff --git common/src/java/org/apache/hadoop/hive/common/CompressionUtils.java common/src/java/org/apache/hadoop/hive/common/CompressionUtils.java index d98632ef87..681c506b1b 100644 --- common/src/java/org/apache/hadoop/hive/common/CompressionUtils.java +++ common/src/java/org/apache/hadoop/hive/common/CompressionUtils.java @@ -159,10 +159,6 @@ public static void zip(String parentDir, String[] inputFiles, String outputFile) TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { final File outputFile = new File(outputDir, entry.getName()); - if (!outputFile.toPath().toAbsolutePath().normalize() - .startsWith(outputDir.toPath().toAbsolutePath().normalize())) { - throw new IOException("Untarred file is not under the output directory"); - } if (entry.isDirectory()) { if (flatten) { // no sub-directories diff --git common/src/java/org/apache/hadoop/hive/common/JavaUtils.java common/src/java/org/apache/hadoop/hive/common/JavaUtils.java index c011cd1626..e09dec1de5 100644 --- common/src/java/org/apache/hadoop/hive/common/JavaUtils.java +++ common/src/java/org/apache/hadoop/hive/common/JavaUtils.java @@ -71,10 +71,6 @@ public static ClassLoader getClassLoader() { return classLoader; } - public static Class loadClass(String shadePrefix, String className) throws ClassNotFoundException { - return loadClass(shadePrefix + "." + className); - } - public static Class loadClass(String className) throws ClassNotFoundException { return loadClass(className, true); } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0a997a1569..cc490afc77 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1911,9 +1911,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Hive streaming ingest has auto flush mechanism to flush all open record updaters under memory pressure.\n" + "When memory usage exceed hive.heap.memory.monitor.default.usage.threshold, the auto-flush mechanism will \n" + "wait until this size (default 100Mb) of records are ingested before triggering flush."), - HIVE_CLASSLOADER_SHADE_PREFIX("hive.classloader.shade.prefix", "", "During reflective instantiation of a class\n" + - "(input, output formats, serde etc.), when classloader throws ClassNotFoundException, as a fallback this\n" + - "shade prefix will be used before class reference and retried."), HIVE_ORC_MS_FOOTER_CACHE_ENABLED("hive.orc.splits.ms.footer.cache.enabled", false, "Whether to enable using file metadata cache in metastore for ORC file footers."), @@ -2623,11 +2620,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Set to true to ensure that each SQL Merge statement ensures that for each row in the target\n" + "table there is at most 1 matching row in the source table per SQL Specification."), - // For Arrow SerDe - HIVE_ARROW_ROOT_ALLOCATOR_LIMIT("hive.arrow.root.allocator.limit", Long.MAX_VALUE, - "Arrow root allocator memory size limitation in bytes."), - HIVE_ARROW_BATCH_SIZE("hive.arrow.batch.size", 1000, "The number of rows sent in one Arrow batch."), - // For Druid storage handler HIVE_DRUID_INDEXING_GRANULARITY("hive.druid.indexer.segments.granularity", "DAY", new PatternSet("YEAR", "MONTH", "WEEK", "DAY", "HOUR", "MINUTE", "SECOND"), @@ -2690,6 +2682,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Wait time in ms default to 30 seconds." ), HIVE_DRUID_BITMAP_FACTORY_TYPE("hive.druid.bitmap.type", "roaring", new PatternSet("roaring", "concise"), "Coding algorithm use to encode the bitmaps"), + HIVE_DRUID_APPROX_RESULT("hive.druid.approx.result", false, + "Whether to allow approximate results from druid. \n" + + "When set to true decimals will be stored as double and druid is allowed to return approximate results for decimal columns."), // For HBase storage handler HIVE_HBASE_WAL_ENABLED("hive.hbase.wal.enabled", true, "Whether writes to HBase should be forced to the write-ahead log. \n" + diff --git dev-support/jenkins-common.sh dev-support/jenkins-common.sh index 0467d119cc..64f486fc2b 100644 --- dev-support/jenkins-common.sh +++ dev-support/jenkins-common.sh @@ -15,6 +15,8 @@ # limitations under the License. JIRA_ROOT_URL="https://issues.apache.org" +JENKINS_URL="https://builds.apache.org" +JENKINS_QUEUE_QUERY="/queue/api/json?tree=items[task[name],inQueueSince,actions[parameters[name,value]],why]" fail() { echo "$@" 1>&2 diff --git dev-support/jenkins-execute-build.sh dev-support/jenkins-execute-build.sh index f660fcb0a3..35392dd565 100644 --- dev-support/jenkins-execute-build.sh +++ dev-support/jenkins-execute-build.sh @@ -51,7 +51,8 @@ call_ptest_server() { local PTEST_CLASSPATH="$PTEST_BUILD_DIR/hive/testutils/ptest2/target/hive-ptest-3.0-classes.jar:$PTEST_BUILD_DIR/hive/testutils/ptest2/target/lib/*" java -cp "$PTEST_CLASSPATH" org.apache.hive.ptest.api.client.PTestClient --command testStart \ - --outputDir "$PTEST_BUILD_DIR/hive/testutils/ptest2/target" --password "$JIRA_PASSWORD" "$@" + --outputDir "$PTEST_BUILD_DIR/hive/testutils/ptest2/target" --password "$JIRA_PASSWORD" \ + --jenkinsQueueUrl "$JENKINS_URL$JENKINS_QUEUE_QUERY" "$@" } # Unpack all test results diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index 3e707e3415..fc5a5fa062 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -81,9 +81,11 @@ import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.StorageHandlerInfo; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -98,7 +100,6 @@ import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.common.util.ShutdownHookManager; - import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponseStatus; import org.joda.time.DateTime; @@ -888,6 +889,14 @@ public Configuration getConf() { return conf; } + @Override public LockType getLockType(WriteEntity writeEntity + ) { + if (writeEntity.getWriteType().equals(WriteEntity.WriteType.INSERT)) { + return LockType.SHARED_READ; + } + return LockType.SHARED_WRITE; + } + @Override public String toString() { return Constants.DRUID_HIVE_STORAGE_HANDLER_ID; @@ -918,55 +927,50 @@ private MetadataStorageTablesConfig getDruidMetadataStorageTablesConfig() { } private SQLMetadataConnector getConnector() { + return Suppliers.memoize(this::buildConnector).get(); + } + + private SQLMetadataConnector buildConnector() { + if (connector != null) { return connector; } - final String dbType = HiveConf - .getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_TYPE); - final String username = HiveConf - .getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_USERNAME); - final String password = HiveConf - .getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_PASSWORD); - final String uri = HiveConf - .getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_URI); - - - final Supplier storageConnectorConfigSupplier = Suppliers.ofInstance( - new MetadataStorageConnectorConfig() { - @Override - public String getConnectURI() { - return uri; - } - - @Override - public String getUser() { - return Strings.emptyToNull(username); - } - - @Override - public String getPassword() { - return Strings.emptyToNull(password); - } - }); + final String dbType = HiveConf.getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_TYPE); + final String username = HiveConf.getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_USERNAME); + final String password = HiveConf.getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_PASSWORD); + final String uri = HiveConf.getVar(getConf(), HiveConf.ConfVars.DRUID_METADATA_DB_URI); + LOG.debug("Supplying SQL Connector with DB type {}, URI {}, User {}", dbType, uri, username); + final Supplier storageConnectorConfigSupplier = + Suppliers.ofInstance(new MetadataStorageConnectorConfig() { + @Override public String getConnectURI() { + return uri; + } + + @Override public String getUser() { + return Strings.emptyToNull(username); + } + + @Override public String getPassword() { + return Strings.emptyToNull(password); + } + }); if (dbType.equals("mysql")) { connector = new MySQLConnector(storageConnectorConfigSupplier, - Suppliers.ofInstance(getDruidMetadataStorageTablesConfig()) - , new MySQLConnectorConfig()); + Suppliers.ofInstance(getDruidMetadataStorageTablesConfig()), new MySQLConnectorConfig() + ); } else if (dbType.equals("postgresql")) { connector = new PostgreSQLConnector(storageConnectorConfigSupplier, - Suppliers.ofInstance(getDruidMetadataStorageTablesConfig()) + Suppliers.ofInstance(getDruidMetadataStorageTablesConfig()) ); } else if (dbType.equals("derby")) { connector = new DerbyConnector(new DerbyMetadataStorage(storageConnectorConfigSupplier.get()), - storageConnectorConfigSupplier, Suppliers.ofInstance(getDruidMetadataStorageTablesConfig()) + storageConnectorConfigSupplier, Suppliers.ofInstance(getDruidMetadataStorageTablesConfig()) ); - } - else { + } else { throw new IllegalStateException(String.format("Unknown metadata storage type [%s]", dbType)); } - return connector; } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java index 076f00af37..93d3e5ce3b 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java @@ -817,6 +817,8 @@ public static IndexSpec getIndexSpec(Configuration jc) { // Default, all columns that are not metrics or timestamp, are treated as dimensions final List dimensions = new ArrayList<>(); ImmutableList.Builder aggregatorFactoryBuilder = ImmutableList.builder(); + final boolean approximationAllowed = HiveConf + .getBoolVar(jc, HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT); for (int i = 0; i < columnTypes.size(); i++) { final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) columnTypes .get(i)).getPrimitiveCategory(); @@ -833,10 +835,15 @@ public static IndexSpec getIndexSpec(Configuration jc) { af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i)); break; case DECIMAL: - throw new UnsupportedOperationException( - String.format("Druid does not support decimal column type cast column " - + "[%s] to double", columnNames.get(i))); - + if (approximationAllowed) { + af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i)); + } else { + throw new UnsupportedOperationException( + String.format("Druid does not support decimal column type." + + "Either cast column [%s] to double or Enable Approximate Result for Druid by setting property [%s] to true", + columnNames.get(i), HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT.varname)); + } + break; case TIMESTAMP: // Granularity column String tColumnName = columnNames.get(i); diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index 5f7657975a..d991adb088 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -17,17 +17,26 @@ */ package org.apache.hadoop.hive.druid.serde; -import com.fasterxml.jackson.core.type.TypeReference; -import com.google.common.base.Function; +import java.io.IOException; +import java.io.InputStream; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.stream.Collectors; + import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import io.druid.query.Druids; -import io.druid.query.Druids.SegmentMetadataQueryBuilder; -import io.druid.query.metadata.metadata.ColumnAnalysis; -import io.druid.query.metadata.metadata.SegmentAnalysis; -import io.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.conf.Constants; @@ -44,6 +53,7 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; @@ -57,6 +67,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; @@ -81,21 +92,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.io.InputStream; -import java.sql.Timestamp; -import java.time.Instant; -import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; -import java.util.stream.Collectors; +import com.fasterxml.jackson.core.type.TypeReference; +import com.google.common.base.Function; +import com.google.common.collect.Lists; + +import io.druid.query.Druids; +import io.druid.query.Druids.SegmentMetadataQueryBuilder; +import io.druid.query.metadata.metadata.ColumnAnalysis; +import io.druid.query.metadata.metadata.SegmentAnalysis; +import io.druid.query.metadata.metadata.SegmentMetadataQuery; /** * DruidSerDe that is used to deserialize objects from a Druid data source. @@ -345,6 +350,10 @@ public Writable serialize(Object o, ObjectInspector objectInspector) throws SerD res = ((DoubleObjectInspector) fields.get(i).getFieldObjectInspector()) .get(values.get(i)); break; + case DECIMAL: + res = ((HiveDecimalObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject(values.get(i)).doubleValue(); + break; case CHAR: res = ((HiveCharObjectInspector) fields.get(i).getFieldObjectInspector()) .getPrimitiveJavaObject(values.get(i)).getValue(); @@ -362,7 +371,7 @@ public Writable serialize(Object o, ObjectInspector objectInspector) throws SerD .get(values.get(i)); break; default: - throw new SerDeException("Unsupported type: " + types[i].getPrimitiveCategory()); + throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory()); } value.put(columns[i], res); } @@ -443,6 +452,9 @@ public Object deserialize(Writable writable) throws SerDeException { case DOUBLE: output.add(new DoubleWritable(((Number) value).doubleValue())); break; + case DECIMAL: + output.add(new HiveDecimalWritable(HiveDecimal.create(((Number) value).doubleValue()))); + break; case CHAR: output.add( new HiveCharWritable( diff --git druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java index e45de0f93f..e4fa1a2e59 100644 --- druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java +++ druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java @@ -764,8 +764,8 @@ private void deserializeQueryResults(DruidSerDe serDe, String queryType, String } - private static final String COLUMN_NAMES = "__time,c0,c1,c2,c3,c4,c5,c6,c7,c8"; - private static final String COLUMN_TYPES = "timestamp with local time zone,string,char(6),varchar(8),double,float,bigint,int,smallint,tinyint"; + private static final String COLUMN_NAMES = "__time,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9"; + private static final String COLUMN_TYPES = "timestamp with local time zone,string,char(6),varchar(8),double,float,decimal(38,18),bigint,int,smallint,tinyint"; private static final Object[] ROW_OBJECT = new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1377907200000L).atZone(ZoneOffset.UTC))), new Text("dim1_val"), @@ -773,6 +773,7 @@ private void deserializeQueryResults(DruidSerDe serDe, String queryType, String new HiveVarcharWritable(new HiveVarchar("dim3_val", 8)), new DoubleWritable(10669.3D), new FloatWritable(10669.45F), + new HiveDecimalWritable(HiveDecimal.create(1064.34D)), new LongWritable(1113939), new IntWritable(1112123), new ShortWritable((short) 12), @@ -787,10 +788,11 @@ private void deserializeQueryResults(DruidSerDe serDe, String queryType, String .put("c2", "dim3_val") .put("c3", 10669.3D) .put("c4", 10669.45F) - .put("c5", 1113939L) - .put("c6", 1112123) - .put("c7", (short) 12) - .put("c8", (byte) 0) + .put("c5", 1064.34D) + .put("c6", 1113939L) + .put("c7", 1112123) + .put("c8", (short) 12) + .put("c9", (byte) 0) .put("__time_granularity", 1377907200000L) .build()); @@ -875,6 +877,7 @@ public ObjectInspector apply(PrimitiveTypeInfo type) { new HiveVarcharWritable(new HiveVarchar("dim3_val", 8)), new DoubleWritable(10669.3D), new FloatWritable(10669.45F), + new HiveDecimalWritable(HiveDecimal.create(1064.34D)), new LongWritable(1113939), new IntWritable(1112123), new ShortWritable((short) 12), @@ -888,10 +891,11 @@ public ObjectInspector apply(PrimitiveTypeInfo type) { .put("c2", "dim3_val") .put("c3", 10669.3D) .put("c4", 10669.45F) - .put("c5", 1113939L) - .put("c6", 1112123) - .put("c7", (short) 12) - .put("c8", (byte) 0) + .put("c5", 1064.34D) + .put("c6", 1113939L) + .put("c7", 1112123) + .put("c8", (short) 12) + .put("c9", (byte) 0) .build()); @Test diff --git itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java index 4ebec1bf1f..ffd714688f 100644 --- itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java +++ itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java @@ -25,7 +25,6 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; -import org.junit.Ignore; import java.util.Iterator; import org.apache.commons.io.FileUtils; @@ -161,7 +160,6 @@ public void testTextTableWriteRead() throws Exception { assertEquals(input.length, numTuplesRead); } - @Ignore("Disabling this test. Check HIVE-19506 for more details") @Test public void testSequenceTableWriteReadMR() throws Exception { String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; @@ -208,7 +206,6 @@ public void testSequenceTableWriteReadMR() throws Exception { assertEquals(input.length, numTuplesRead); } - @Ignore("Disabling this test. Check HIVE-19506 for more details") @Test public void testTextTableWriteReadMR() throws Exception { String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java index 0af91bda95..ac28e43340 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java @@ -185,7 +185,6 @@ public void testMapJoinOnTez() throws Exception { * Tests non acid to acid conversion where starting table has non-standard layout, i.e. * where "original" files are not immediate children of the partition dir */ - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void testNonStandardConversion01() throws Exception { HiveConf confForTez = new HiveConf(hiveConf); // make a clone of existing hive conf @@ -432,7 +431,6 @@ public void testNonStandardConversion02() throws Exception { * {@link org.apache.hadoop.hive.ql.metadata.Hive#moveAcidFiles(FileSystem, FileStatus[], Path, List)} drops the union subdirs * since each delta file has a unique name. */ - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void testCtasTezUnion() throws Exception { HiveConf confForTez = new HiveConf(hiveConf); // make a clone of existing hive conf diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java index 6ed872db6c..4838856062 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java @@ -42,7 +42,7 @@ public void testMTQueries1() throws Exception { // derby fails creating multiple stats aggregator concurrently util.getConf().setBoolean("hive.exec.submitviachild", true); util.getConf().setBoolean("hive.exec.submit.local.task.via.child", true); - util.getConf().setBoolean("hive.vectorized.execution.enabled", true); + util.getConf().setBoolean("hive.vectorized.execution.enabled", false); util.getConf().set("hive.stats.dbclass", "fs"); util.getConf().set("hive.mapred.mode", "nonstrict"); util.getConf().set("hive.stats.column.autogather", "false"); diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/metadata/TestAlterTableMetadata.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/metadata/TestAlterTableMetadata.java deleted file mode 100644 index 940a1d370d..0000000000 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/metadata/TestAlterTableMetadata.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.metadata; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.PrincipalType; -import org.apache.hadoop.hive.ql.DriverFactory; -import org.apache.hadoop.hive.ql.IDriver; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.junit.Test; - -import static org.junit.Assert.assertEquals; - -public class TestAlterTableMetadata { - @Test - public void testAlterTableOwner() throws HiveException { - /* - * This test verifies that the ALTER TABLE ... SET OWNER command will change the - * owner metadata of the table in HMS. - */ - - HiveConf conf = new HiveConf(this.getClass()); - conf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - SessionState.start(conf); - IDriver driver = DriverFactory.newDriver(conf); - CommandProcessorResponse resp; - Table table; - - resp = driver.run("create table t1(id int)"); - assertEquals(0, resp.getResponseCode()); - - // Changes the owner to a user and verify the change - resp = driver.run("alter table t1 set owner user u1"); - assertEquals(0, resp.getResponseCode()); - - table = Hive.get(conf).getTable("t1"); - assertEquals(PrincipalType.USER, table.getOwnerType()); - assertEquals("u1", table.getOwner()); - - // Changes the owner to a group and verify the change - resp = driver.run("alter table t1 set owner group g1"); - assertEquals(0, resp.getResponseCode()); - - table = Hive.get(conf).getTable("t1"); - assertEquals(PrincipalType.GROUP, table.getOwnerType()); - assertEquals("g1", table.getOwner()); - - // Changes the owner to a role and verify the change - resp = driver.run("alter table t1 set owner role r1"); - assertEquals(0, resp.getResponseCode()); - - table = Hive.get(conf).getTable("t1"); - assertEquals(PrincipalType.ROLE, table.getOwnerType()); - assertEquals("r1", table.getOwner()); - } -} diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java index 73102a7dd3..fd05e99137 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java @@ -94,8 +94,6 @@ public void targetAndSourceHaveDifferentEncryptionZoneKeys() throws Throwable { new HashMap() {{ put(HiveConf.ConfVars.HIVE_IN_TEST.varname, "false"); put(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS.varname, "false"); - put(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname, - UserGroupInformation.getCurrentUser().getUserName()); }}, "test_key123"); WarehouseInstance.Tuple tuple = @@ -107,8 +105,7 @@ public void targetAndSourceHaveDifferentEncryptionZoneKeys() throws Throwable { replica .run("repl load " + replicatedDbName + " from '" + tuple.dumpLocation - + "' with('hive.repl.add.raw.reserved.namespace'='true', " - + "'distcp.options.pugpbx'='', 'distcp.options.skipcrccheck'='')") + + "' with('hive.repl.add.raw.reserved.namespace'='true')") .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index df9bde059e..70e1aa7f3a 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -675,62 +675,4 @@ public void testIncrementalReplWithEventsBatchHavingDropCreateTable() throws Thr .run("select id from table2 order by id") .verifyResults(new String[] {"2"}); } - - @Test - public void testIncrementalReplWithDropAndCreateTableDifferentPartitionTypeAndInsert() throws Throwable { - // Bootstrap dump with empty db - WarehouseInstance.Tuple bootstrapTuple = primary.dump(primaryDbName, null); - - // Bootstrap load in replica - replica.load(replicatedDbName, bootstrapTuple.dumpLocation) - .status(replicatedDbName) - .verifyResult(bootstrapTuple.lastReplicationId); - - // First incremental dump - WarehouseInstance.Tuple firstIncremental = primary.run("use " + primaryDbName) - .run("create table table1 (id int) partitioned by (country string)") - .run("create table table2 (id int)") - .run("create table table3 (id int) partitioned by (country string)") - .run("insert into table1 partition(country='india') values(1)") - .run("insert into table2 values(2)") - .run("insert into table3 partition(country='india') values(3)") - .dump(primaryDbName, bootstrapTuple.lastReplicationId); - - // Second incremental dump - WarehouseInstance.Tuple secondIncremental = primary.run("use " + primaryDbName) - .run("drop table table1") - .run("drop table table2") - .run("drop table table3") - .run("create table table1 (id int)") - .run("insert into table1 values (10)") - .run("create table table2 (id int) partitioned by (country string)") - .run("insert into table2 partition(country='india') values(20)") - .run("create table table3 (id int) partitioned by (name string, rank int)") - .run("insert into table3 partition(name='adam', rank=100) values(30)") - .dump(primaryDbName, firstIncremental.lastReplicationId); - - // First incremental load - replica.load(replicatedDbName, firstIncremental.dumpLocation) - .status(replicatedDbName) - .verifyResult(firstIncremental.lastReplicationId) - .run("use " + replicatedDbName) - .run("select id from table1") - .verifyResults(new String[] {"1"}) - .run("select * from table2") - .verifyResults(new String[] {"2"}) - .run("select id from table3") - .verifyResults(new String[] {"3"}); - - // Second incremental load - replica.load(replicatedDbName, secondIncremental.dumpLocation) - .status(replicatedDbName) - .verifyResult(secondIncremental.lastReplicationId) - .run("use " + replicatedDbName) - .run("select * from table1") - .verifyResults(new String[] {"10"}) - .run("select id from table2") - .verifyResults(new String[] {"20"}) - .run("select id from table3") - .verifyResults(new String[] {"30"}); - } } diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index dc31e922be..f95e1c62fe 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -123,7 +123,6 @@ private void initialize(String cmRoot, String warehouseRoot, hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); if (!hiveConf.getVar(HiveConf.ConfVars.HIVE_TXN_MANAGER).equals("org.apache.hadoop.hive.ql.lockmgr.DbTxnManager")) { hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); } diff --git itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java index 51e491c9ca..55c6c23da6 100644 --- itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java +++ itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java @@ -55,7 +55,6 @@ import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; /** @@ -769,7 +768,6 @@ public void testEmbeddedBeelineConnection() throws Throwable{ * Test Beeline could show the query progress for time-consuming query. * @throws Throwable */ - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void testQueryProgress() throws Throwable { final String SCRIPT_TEXT = @@ -797,7 +795,6 @@ public void testQueryProgress() throws Throwable { * * @throws Throwable */ - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void testQueryProgressParallel() throws Throwable { final String SCRIPT_TEXT = "set hive.support.concurrency = false;\n" + diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java index 714fba23c6..8aefef8a97 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java @@ -350,7 +350,6 @@ public void testSSLFetch() throws Exception { * Start HS2 in Http mode with SSL enabled, open a SSL connection and fetch data * @throws Exception */ - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void testSSLFetchHttp() throws Exception { SSLTestUtils.setSslConfOverlay(confOverlay); diff --git itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestMiniDruidKafkaCliDriver.java itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestMiniDruidKafkaCliDriver.java index e2d26ab726..4768975225 100644 --- itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestMiniDruidKafkaCliDriver.java +++ itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestMiniDruidKafkaCliDriver.java @@ -22,7 +22,6 @@ import org.junit.ClassRule; import org.junit.Rule; -import org.junit.Ignore; import org.junit.Test; import org.junit.rules.TestRule; import org.junit.runner.RunWith; @@ -32,7 +31,6 @@ import java.io.File; import java.util.List; -@Ignore("HIVE-19509: Disable tests that are failing continuously") @RunWith(Parameterized.class) public class TestMiniDruidKafkaCliDriver { diff --git itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index cf8cea7fec..6628336807 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -58,8 +58,6 @@ public CliConfig() { excludesFrom(testConfigProps, "druid.query.files"); excludesFrom(testConfigProps, "druid.kafka.query.files"); - excludeQuery("fouter_join_ppr.q"); // Disabled in HIVE-19509 - setResultsDir("ql/src/test/results/clientpositive"); setLogDir("itests/qtest/target/qfile-results/clientpositive"); @@ -198,8 +196,6 @@ public MiniDruidKafkaCliConfig() { includesFrom(testConfigProps, "druid.kafka.query.files"); - excludeQuery("druidkafkamini_basic.q"); // Disabled in HIVE-19509 - setResultsDir("ql/src/test/results/clientpositive/druid"); setLogDir("itests/qtest/target/tmp/log"); @@ -224,15 +220,6 @@ public MiniLlapLocalCliConfig() { includesFrom(testConfigProps, "minillaplocal.query.files"); includesFrom(testConfigProps, "minillaplocal.shared.query.files"); - excludeQuery("bucket_map_join_tez1.q"); // Disabled in HIVE-19509 - excludeQuery("special_character_in_tabnames_1.q"); // Disabled in HIVE-19509 - excludeQuery("sysdb.q"); // Disabled in HIVE-19509 - excludeQuery("tez_smb_1.q"); // Disabled in HIVE-19509 - excludeQuery("union_fast_stats.q"); // Disabled in HIVE-19509 - excludeQuery("schema_evol_orc_acidvec_part.q"); // Disabled in HIVE-19509 - excludeQuery("schema_evol_orc_vec_part_llap_io.q"); // Disabled in HIVE-19509 - excludeQuery("tez_dynpart_hashjoin_1.q"); // Disabled in HIVE-19509 - excludeQuery("tez_vector_dynpart_hashjoin_1.q"); // Disabled in HIVE-19509 setResultsDir("ql/src/test/results/clientpositive/llap"); setLogDir("itests/qtest/target/qfile-results/clientpositive"); @@ -373,8 +360,6 @@ public NegativeCliConfig() { excludesFrom(testConfigProps, "minimr.query.negative.files"); excludesFrom(testConfigProps, "spark.only.query.negative.files"); excludeQuery("authorization_uri_import.q"); - excludeQuery("merge_negative_5.q"); // Disabled in HIVE-19509 - excludeQuery("mm_concatenate.q"); // Disabled in HIVE-19509 setResultsDir("ql/src/test/results/clientnegative"); setLogDir("itests/qtest/target/qfile-results/clientnegative"); diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 16571b3ff3..750fc69c5f 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -36,7 +36,6 @@ import java.io.PrintStream; import java.io.Serializable; import java.io.StringWriter; -import java.io.UnsupportedEncodingException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.sql.SQLException; @@ -1205,13 +1204,11 @@ private void initDataSetForTest(File file){ DatasetCollection datasets = parser.getDatasets(); for (String table : datasets.getTables()){ - synchronized (QTestUtil.class){ - initDataset(table); - } + initDataset(table); } } - protected void initDataset(String table) { + protected synchronized void initDataset(String table) { if (getSrcTables().contains(table)){ return; } @@ -1278,7 +1275,7 @@ public String cliInit(File file, boolean recreate) throws Exception { initDataSetForTest(file); HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, - "org.apache.hadoop.hive.ql.security.DummyAuthenticator"); + "org.apache.hadoop.hive.ql.security.DummyAuthenticator"); Utilities.clearWorkMap(conf); CliSessionState ss = new CliSessionState(conf); assert ss != null; @@ -1295,30 +1292,6 @@ public String cliInit(File file, boolean recreate) throws Exception { } File outf = new File(logDir, stdoutName); - - setSessionOutputs(fileName, ss, outf); - - SessionState oldSs = SessionState.get(); - - boolean canReuseSession = !qNoSessionReuseQuerySet.contains(fileName); - restartSessions(canReuseSession, ss, oldSs); - - closeSession(oldSs); - - SessionState.start(ss); - - cliDriver = new CliDriver(); - - if (fileName.equals("init_file.q")) { - ss.initFiles.add(AbstractCliConfig.HIVE_ROOT + "/data/scripts/test_init_file.sql"); - } - cliDriver.processInitFiles(ss); - - return outf.getAbsolutePath(); - } - - private void setSessionOutputs(String fileName, CliSessionState ss, File outf) - throws FileNotFoundException, Exception, UnsupportedEncodingException { OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf)); if (qSortQuerySet.contains(fileName)) { ss.out = new SortPrintStream(fo, "UTF-8"); @@ -1331,12 +1304,10 @@ private void setSessionOutputs(String fileName, CliSessionState ss, File outf) } ss.err = new CachingPrintStream(fo, true, "UTF-8"); ss.setIsSilent(true); - } + SessionState oldSs = SessionState.get(); - private void restartSessions(boolean canReuseSession, CliSessionState ss, SessionState oldSs) - throws IOException { - if (oldSs != null && canReuseSession - && clusterType.getCoreClusterType() == CoreClusterType.TEZ) { + boolean canReuseSession = !qNoSessionReuseQuerySet.contains(fileName); + if (oldSs != null && canReuseSession && clusterType.getCoreClusterType() == CoreClusterType.TEZ) { // Copy the tezSessionState from the old CliSessionState. TezSessionState tezSessionState = oldSs.getTezSession(); oldSs.setTezSession(null); @@ -1350,9 +1321,27 @@ private void restartSessions(boolean canReuseSession, CliSessionState ss, Sessio oldSs.setSparkSession(null); oldSs.close(); } + + if (oldSs != null && oldSs.out != null && oldSs.out != System.out) { + oldSs.out.close(); + } + if (oldSs != null) { + oldSs.close(); + } + SessionState.start(ss); + + cliDriver = new CliDriver(); + + if (fileName.equals("init_file.q")) { + ss.initFiles.add(AbstractCliConfig.HIVE_ROOT + "/data/scripts/test_init_file.sql"); + } + cliDriver.processInitFiles(ss); + + return outf.getAbsolutePath(); } - private CliSessionState startSessionState(boolean canReuseSession) throws IOException { + private CliSessionState startSessionState(boolean canReuseSession) + throws IOException { HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.DummyAuthenticator"); @@ -1366,25 +1355,32 @@ private CliSessionState startSessionState(boolean canReuseSession) throws IOExce ss.err = System.out; SessionState oldSs = SessionState.get(); + if (oldSs != null && canReuseSession && clusterType.getCoreClusterType() == CoreClusterType.TEZ) { + // Copy the tezSessionState from the old CliSessionState. + TezSessionState tezSessionState = oldSs.getTezSession(); + ss.setTezSession(tezSessionState); + oldSs.setTezSession(null); + oldSs.close(); + } - restartSessions(canReuseSession, ss, oldSs); - - closeSession(oldSs); - SessionState.start(ss); - - isSessionStateStarted = true; - - conf.set("hive.execution.engine", execEngine); - return ss; - } - - private void closeSession(SessionState oldSs) throws IOException { + if (oldSs != null && clusterType.getCoreClusterType() == CoreClusterType.SPARK) { + sparkSession = oldSs.getSparkSession(); + ss.setSparkSession(sparkSession); + oldSs.setSparkSession(null); + oldSs.close(); + } if (oldSs != null && oldSs.out != null && oldSs.out != System.out) { oldSs.out.close(); } if (oldSs != null) { oldSs.close(); } + SessionState.start(ss); + + isSessionStateStarted = true; + + conf.set("hive.execution.engine", execEngine); + return ss; } public int executeAdhocCommand(String q) { @@ -1995,7 +1991,6 @@ public QTRunner(QTestUtil qt, File file) { @Override public void run() { try { - qt.startSessionState(false); // assumption is that environment has already been cleaned once globally // hence each thread does not call cleanUp() and createSources() again qt.cliInit(file, false); diff --git itests/util/src/test/java/org/apache/hadoop/hive/cli/control/TestDanglingQOuts.java itests/util/src/test/java/org/apache/hadoop/hive/cli/control/TestDanglingQOuts.java index 33caeb15ef..500d11463b 100644 --- itests/util/src/test/java/org/apache/hadoop/hive/cli/control/TestDanglingQOuts.java +++ itests/util/src/test/java/org/apache/hadoop/hive/cli/control/TestDanglingQOuts.java @@ -87,7 +87,6 @@ public TestDanglingQOuts() throws Exception { } } - @Ignore("Disabling till HIVE-19509 gets solved") @Test public void checkDanglingQOut() { SetView dangling = Sets.difference(outsFound, outsNeeded.keySet()); diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index 05282db163..fc0c66a888 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -296,9 +296,6 @@ private void repositionInStreams(TreeReaderFactory.TreeReader[] columnReaders, ConsumerStripeMetadata stripeMetadata) throws IOException { PositionProvider[] pps = createPositionProviders( columnReaders, batch.getBatchKey(), stripeMetadata); - if (LlapIoImpl.ORC_LOGGER.isTraceEnabled()) { - LlapIoImpl.ORC_LOGGER.trace("Created pps {}", Arrays.toString(pps)); - } if (pps == null) return; for (int i = 0; i < columnReaders.length; i++) { TreeReader reader = columnReaders[i]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 63fe8adc8b..abde9f786f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -4330,11 +4330,6 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition } tbl.setStoredAsSubDirectories(alterTbl.isStoredAsSubDirectories()); - } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.OWNER) { - if (alterTbl.getOwnerPrincipal() != null) { - tbl.setOwner(alterTbl.getOwnerPrincipal().getName()); - tbl.setOwnerType(alterTbl.getOwnerPrincipal().getType()); - } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERSKEWEDLOCATION) { // process location one-by-one Map,String> locMaps = alterTbl.getSkewedLocations(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index a1f549a367..4611ce9a28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -214,7 +214,7 @@ system.registerUDF("rand", UDFRand.class, false); system.registerGenericUDF("abs", GenericUDFAbs.class); system.registerGenericUDF("sq_count_check", GenericUDFSQCountCheck.class); - system.registerGenericUDF("enforce_constraint", GenericUDFEnforceConstraint.class); + system.registerGenericUDF("enforce_constraint", GenericUDFEnforceNotNullConstraint.class); system.registerGenericUDF("pmod", GenericUDFPosMod.class); system.registerUDF("ln", UDFLn.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java index b9d6f587dd..a0a90a96f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java @@ -30,7 +30,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.ResourceType; @@ -166,9 +165,7 @@ private int createPermanentFunction(Hive db, CreateFunctionDesc createFunctionDe checkLocalFunctionResources(db, createFunctionDesc.getResources()); FunctionInfo registered = null; - HiveConf oldConf = SessionState.get().getConf(); try { - SessionState.get().setConf(conf); registered = FunctionRegistry.registerPermanentFunction( registeredName, className, true, toFunctionResource(resources)); } catch (RuntimeException ex) { @@ -176,10 +173,7 @@ private int createPermanentFunction(Hive db, CreateFunctionDesc createFunctionDe while (t.getCause() != null) { t = t.getCause(); } - } finally { - SessionState.get().setConf(oldConf); } - if (registered == null) { console.printError("Failed to register " + registeredName + " using class " + createFunctionDesc.getClassName()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 491a6b1257..2cad04b767 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -390,6 +390,9 @@ public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) thr if (initialDataTypePhysicalVariations == null) { return null; } + if (columnNum < 0) { + fake++; + } if (columnNum < initialDataTypePhysicalVariations.size()) { return initialDataTypePhysicalVariations.get(columnNum); } @@ -1679,6 +1682,8 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd return vectorExpression; } + static int fake = 0; + private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, Class udfClass, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java similarity index 56% rename from ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt rename to ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index e8ef279958..13e5fff7c4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -15,11 +15,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -29,7 +28,7 @@ * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public class extends VectorExpression { +public class IfExprIntervalDayTimeColumnColumn extends VectorExpression { private static final long serialVersionUID = 1L; @@ -37,7 +36,7 @@ private final int arg2Column; private final int arg3Column; - public (int arg1Column, int arg2Column, int arg3Column, + public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumnNum) { super(outputColumnNum); this.arg1Column = arg1Column; @@ -45,7 +44,7 @@ this.arg3Column = arg3Column; } - public () { + public IfExprIntervalDayTimeColumnColumn() { super(); // Dummy final assignments. @@ -62,17 +61,12 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - arg2ColVector = () batch.cols[arg2Column]; - boolean[] arg2IsNull = arg2ColVector.isNull; - arg3ColVector = () batch.cols[arg3Column]; - boolean[] arg3IsNull = arg3ColVector.isNull; - outputColVector = () batch.cols[outputColumnNum]; + IntervalDayTimeColumnVector arg2ColVector = (IntervalDayTimeColumnVector) batch.cols[arg2Column]; + IntervalDayTimeColumnVector arg3ColVector = (IntervalDayTimeColumnVector) batch.cols[arg3Column]; + IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - // We do not need to do a column reset since we are carefully changing the output. - outputColVector.isRepeating = false; - int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,6 +75,9 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input @@ -101,89 +98,53 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); } } else { for(int i = 0; i != n; i++) { - if (vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); } } } @@ -207,11 +168,11 @@ public String vectorExpressionParameters() { .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("int_family"), - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); } -} +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java similarity index 60% rename from ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt rename to ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java index 56ae2ca8d9..d5fb6a0f17 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -16,14 +16,15 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import ; import org.apache.hadoop.hive.ql.metadata.HiveException; /** @@ -32,15 +33,15 @@ * The second is a column or non-constant expression result. * The third is a constant value. */ -public class extends VectorExpression { +public class IfExprIntervalDayTimeColumnScalar extends VectorExpression { private static final long serialVersionUID = 1L; private final int arg1Column; private final int arg2Column; - private final arg3Scalar; + private final HiveIntervalDayTime arg3Scalar; - public (int arg1Column, int arg2Column, arg3Scalar, + public IfExprIntervalDayTimeColumnScalar(int arg1Column, int arg2Column, HiveIntervalDayTime arg3Scalar, int outputColumnNum) { super(outputColumnNum); this.arg1Column = arg1Column; @@ -48,7 +49,7 @@ this.arg3Scalar = arg3Scalar; } - public () { + public IfExprIntervalDayTimeColumnScalar() { super(); // Dummy final assignments. @@ -65,9 +66,8 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - arg2ColVector = () batch.cols[arg2Column]; - boolean[] arg2IsNull = arg2ColVector.isNull; - outputColVector = () batch.cols[outputColumnNum]; + IntervalDayTimeColumnVector arg2ColVector = (IntervalDayTimeColumnVector) batch.cols[arg2Column]; + IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; @@ -96,72 +96,42 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { arg2ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { - - // FUTURE: We could check arg2ColVector.noNulls and optimize these loops. if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - outputIsNull[i] = false; - outputColVector.set(i, arg3Scalar); - } + outputIsNull[i] = false; + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { - if (vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - outputIsNull[i] = false; - outputColVector.set(i, arg3Scalar); - } + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } } else /* there are nulls */ { + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - outputIsNull[i] = false; - outputColVector.set(i, arg3Scalar); - } + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); } } else { for(int i = 0; i != n; i++) { - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg2ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } else { - outputIsNull[i] = false; - outputColVector.set(i, arg3Scalar); - } + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); } } } @@ -173,7 +143,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { @Override public String vectorExpressionParameters() { return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + - ", val "+ arg3Scalar; + ", val "+ arg3Scalar.toString(); } @Override @@ -184,11 +154,11 @@ public String vectorExpressionParameters() { .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("int_family"), - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); } -} +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java similarity index 59% rename from ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt rename to ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java index 271b589e4a..53466e59c3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -16,14 +16,15 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import ; import org.apache.hadoop.hive.ql.metadata.HiveException; /** @@ -32,23 +33,23 @@ * The second is a column or non-constant expression result. * The third is a constant value. */ -public class extends VectorExpression { +public class IfExprIntervalDayTimeScalarColumn extends VectorExpression { private static final long serialVersionUID = 1L; private final int arg1Column; - private arg2Scalar; + private final HiveIntervalDayTime arg2Scalar; private final int arg3Column; - public (int arg1Column, arg2Scalar, int arg3Column, - int outputColumnNum) { + public IfExprIntervalDayTimeScalarColumn(int arg1Column, HiveIntervalDayTime arg2Scalar, + int arg3Column, int outputColumnNum) { super(outputColumnNum); this.arg1Column = arg1Column; this.arg2Scalar = arg2Scalar; this.arg3Column = arg3Column; } - public () { + public IfExprIntervalDayTimeScalarColumn() { super(); // Dummy final assignments. @@ -65,9 +66,8 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - arg3ColVector = () batch.cols[arg3Column]; - boolean[] arg3IsNull = arg3ColVector.isNull; - outputColVector = () batch.cols[outputColumnNum]; + IntervalDayTimeColumnVector arg3ColVector = (IntervalDayTimeColumnVector) batch.cols[arg3Column]; + IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; @@ -98,72 +98,62 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { - - // FUTURE: We could check arg3ColVector.noNulls and optimize these loops. if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i] == 1) { - outputIsNull[i] = false; - outputColVector.set(i, arg2Scalar); - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - if (vector1[i] == 1) { - outputIsNull[i] = false; - outputColVector.set(i, arg2Scalar); - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); } } } else /* there are nulls */ { + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - outputIsNull[i] = false; - outputColVector.set(i, arg2Scalar); - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - outputIsNull[i] = false; - outputColVector.set(i, arg2Scalar); - } else { - if (!arg3IsNull[i]) { - outputIsNull[i] = false; - outputColVector.set(i, arg3ColVector.asScratch(i)); - } else { - outputIsNull[i] = true; - outputColVector.noNulls = false; - } - } + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); } } } @@ -174,8 +164,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { @Override public String vectorExpressionParameters() { - return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", " + - getColumnParamString(2, arg3Column); + return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", col "+ arg3Column; } @Override @@ -186,8 +175,8 @@ public String vectorExpressionParameters() { .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("int_family"), - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR, diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java similarity index 82% rename from ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt rename to ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java index 10f97dfcad..177f358899 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -16,16 +16,15 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.util.Arrays; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import ; import org.apache.hadoop.hive.ql.metadata.HiveException; /** @@ -34,23 +33,23 @@ * The second is a constant value. * The third is a constant value. */ -public class extends VectorExpression { +public class IfExprIntervalDayTimeScalarScalar extends VectorExpression { private static final long serialVersionUID = 1L; private final int arg1Column; - private final arg2Scalar; - private final arg3Scalar; + private final HiveIntervalDayTime arg2Scalar; + private final HiveIntervalDayTime arg3Scalar; - public (int arg1Column, arg2Scalar, arg3Scalar, - int outputColumnNum) { + public IfExprIntervalDayTimeScalarScalar(int arg1Column, HiveIntervalDayTime arg2Scalar, + HiveIntervalDayTime arg3Scalar, int outputColumnNum) { super(outputColumnNum); this.arg1Column = arg1Column; this.arg2Scalar = arg2Scalar; this.arg3Scalar = arg3Scalar; } - public () { + public IfExprIntervalDayTimeScalarScalar() { super(); // Dummy final assignments. @@ -67,7 +66,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - outputColVector = () batch.cols[outputColumnNum]; + IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; @@ -91,10 +90,6 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { return; } - /* - * Since we always set a value, make sure all isNull entries are set to false. - */ - if (arg1ColVector.noNulls) { if (batch.selectedInUse) { @@ -126,19 +121,28 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = false; outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); + outputIsNull[i] = false; } } else { - Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); } + Arrays.fill(outputIsNull, 0, n, false); } } } @@ -156,8 +160,8 @@ public String vectorExpressionParameters() { .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("int_family"), - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 2a10e29ddd..75de7a090d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -60,7 +60,6 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - boolean[] arg1IsNull = arg1ColVector.isNull; LongColumnVector arg2ColVector = (LongColumnVector) batch.cols[arg2Column]; LongColumnVector arg3ColVector = (LongColumnVector) batch.cols[arg3Column]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; @@ -88,7 +87,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { * of code paths. */ if (arg1ColVector.isRepeating) { - if ((arg1ColVector.noNulls || !arg1IsNull[0]) && vector1[0] == 1) { + if (vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -122,14 +121,14 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = (!arg1IsNull[i] && vector1[i] == 1 ? + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]); outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { - outputVector[i] = (!arg1IsNull[i] && vector1[i] == 1 ? + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]); outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumn.java new file mode 100644 index 0000000000..579eeade51 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumn.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public class IfExprTimestampColumnColumn extends IfExprTimestampColumnColumnBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumnNum) { + super(arg1Column, arg2Column, arg3Column, outputColumnNum); + } + + public IfExprTimestampColumnColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java new file mode 100644 index 0000000000..fc7a859a3a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public abstract class IfExprTimestampColumnColumnBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final int arg2Column; + private final int arg3Column; + + public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public IfExprTimestampColumnColumnBase() { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector arg2ColVector = (TimestampColumnVector) batch.cols[arg2Column]; + TimestampColumnVector arg3ColVector = (TimestampColumnVector) batch.cols[arg3Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i)); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i)); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + getColumnParamString(2, arg3Column); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalar.java new file mode 100644 index 0000000000..fcd7ca43e3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalar.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class IfExprTimestampColumnScalar extends IfExprTimestampColumnScalarBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampColumnScalar(int arg1Column, int arg2Column, Timestamp arg3Scalar, + int outputColumnNum) { + super(arg1Column, arg2Column, arg3Scalar, outputColumnNum); + } + + public IfExprTimestampColumnScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java new file mode 100644 index 0000000000..64add366b6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public abstract class IfExprTimestampColumnScalarBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final int arg2Column; + private final Timestamp arg3Scalar; + + public IfExprTimestampColumnScalarBase(int arg1Column, int arg2Column, Timestamp arg3Scalar, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + } + + public IfExprTimestampColumnScalarBase() { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Scalar = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector arg2ColVector = (TimestampColumnVector) batch.cols[arg2Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchTimestamp(i) : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchTimestamp(i) : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + ", val "+ arg3Scalar; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumn.java new file mode 100644 index 0000000000..510774a263 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumn.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class IfExprTimestampScalarColumn extends IfExprTimestampScalarColumnBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampScalarColumn(int arg1Column, Timestamp arg2Scalar, int arg3Column, + int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Column, outputColumnNum); + } + + public IfExprTimestampScalarColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java new file mode 100644 index 0000000000..73044adb8a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public abstract class IfExprTimestampScalarColumnBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private Timestamp arg2Scalar; + private final int arg3Column; + + public IfExprTimestampScalarColumnBase(int arg1Column, Timestamp arg2Scalar, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + } + + public IfExprTimestampScalarColumnBase() { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Scalar = null; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector arg3ColVector = (TimestampColumnVector) batch.cols[arg3Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + // This could be optimized in the future by having separate paths + // for when arg3ColVector is repeating or has no nulls. + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + } + } + } else /* there are nulls */ { + + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } + } + + // restore repeating and no nulls indicators + arg3ColVector.unFlatten(); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", " + + getColumnParamString(2, arg3Column); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalar.java new file mode 100644 index 0000000000..0c4cce0dbf --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalar.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.sql.Timestamp; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public class IfExprTimestampScalarScalar extends IfExprTimestampScalarScalarBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampScalarScalar(int arg1Column, Timestamp arg2Scalar, Timestamp arg3Scalar, + int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Scalar, outputColumnNum); + } + + public IfExprTimestampScalarScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java new file mode 100644 index 0000000000..df1418e127 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import java.sql.Timestamp; +import java.util.Arrays; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public abstract class IfExprTimestampScalarScalarBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final Timestamp arg2Scalar; + private final Timestamp arg3Scalar; + + public IfExprTimestampScalarScalarBase(int arg1Column, Timestamp arg2Scalar, Timestamp arg3Scalar, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Scalar = arg3Scalar; + } + + public IfExprTimestampScalarScalarBase() { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Scalar = null; + arg3Scalar = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 3d965c0515..655d10b643 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -284,11 +284,7 @@ public void configure(JobConf job) { } return inputFormat; } - try { - serde = part.getDeserializer(conf); - } catch (Exception e) { - throw new HiveException("Error creating SerDe for LLAP IO", e); - } + serde = findSerDeForLlapSerDeIf(conf, part); } if (isSupported && isVectorized) { InputFormat wrappedIf = llapIo.getInputFormat(inputFormat, serde); @@ -323,6 +319,27 @@ private static boolean checkInputFormatForLlapEncode(Configuration conf, String return false; } + private static Deserializer findSerDeForLlapSerDeIf( + Configuration conf, PartitionDesc part) throws HiveException { + VectorPartitionDesc vpart = part.getVectorPartitionDesc(); + if (vpart != null) { + VectorMapOperatorReadType old = vpart.getVectorMapOperatorReadType(); + if (old != VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + if (LOG.isInfoEnabled()) { + LOG.info("Resetting VectorMapOperatorReadType from " + old + " for partition " + + part.getTableName() + " " + part.getPartSpec()); + } + vpart.setVectorMapOperatorReadType( + VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT); + } + } + try { + return part.getDeserializer(conf); + } catch (Exception e) { + throw new HiveException("Error creating SerDe for LLAP IO", e); + } + } + public static void injectLlapCaches(InputFormat inputFormat, LlapIo llapIo) { LOG.info("Injecting LLAP caches into " + inputFormat.getClass().getCanonicalName()); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java deleted file mode 100644 index 330fa580e7..0000000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java +++ /dev/null @@ -1,1179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.arrow; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import io.netty.buffer.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.impl.UnionReader; -import org.apache.arrow.vector.complex.impl.UnionWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter; -import org.apache.arrow.vector.complex.writer.BigIntWriter; -import org.apache.arrow.vector.complex.writer.BitWriter; -import org.apache.arrow.vector.complex.writer.DateDayWriter; -import org.apache.arrow.vector.complex.writer.DecimalWriter; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.complex.writer.Float4Writer; -import org.apache.arrow.vector.complex.writer.Float8Writer; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.complex.writer.IntervalDayWriter; -import org.apache.arrow.vector.complex.writer.IntervalYearWriter; -import org.apache.arrow.vector.complex.writer.SmallIntWriter; -import org.apache.arrow.vector.complex.writer.TimeStampMilliWriter; -import org.apache.arrow.vector.complex.writer.TinyIntWriter; -import org.apache.arrow.vector.complex.writer.VarBinaryWriter; -import org.apache.arrow.vector.complex.writer.VarCharWriter; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.holders.NullableDateDayHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableIntervalDayHolder; -import org.apache.arrow.vector.holders.NullableIntervalYearHolder; -import org.apache.arrow.vector.holders.NullableSmallIntHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.holders.NullableTinyIntHolder; -import org.apache.arrow.vector.holders.NullableVarBinaryHolder; -import org.apache.arrow.vector.holders.NullableVarCharHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.AbstractSerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeStats; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; -import org.apache.hadoop.io.Writable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.DataInput; -import java.io.DataOutput; -import java.lang.reflect.Method; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.function.IntConsumer; - -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ARROW_BATCH_SIZE; -import static org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil.createColumnVector; -import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE; -import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo; -import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromObjectInspector; - -/** - * ArrowColumnarBatchSerDe converts Apache Hive rows to Apache Arrow columns. Its serialized - * class is {@link ArrowWrapperWritable}, which doesn't support {@link - * Writable#readFields(DataInput)} and {@link Writable#write(DataOutput)}. - * - * Followings are known issues of current implementation. - * - * A list column cannot have a decimal column. {@link UnionListWriter} doesn't have an - * implementation for {@link BaseWriter.ListWriter#decimal()}. - * - * A union column can have only one of string, char, varchar fields at a same time. Apache Arrow - * doesn't have string and char, so {@link ArrowColumnarBatchSerDe} uses varchar to simulate - * string and char. They will be considered as a same data type in - * {@link org.apache.arrow.vector.complex.UnionVector}. - * - * Timestamp with local timezone is not supported. {@link VectorAssignRow} doesn't support it. - */ -public class ArrowColumnarBatchSerDe extends AbstractSerDe { - public static final Logger LOG = LoggerFactory.getLogger(ArrowColumnarBatchSerDe.class.getName()); - private static final String DEFAULT_ARROW_FIELD_NAME = "[DEFAULT]"; - - private static final int MS_PER_SECOND = 1_000; - private static final int MS_PER_MINUTE = MS_PER_SECOND * 60; - private static final int MS_PER_HOUR = MS_PER_MINUTE * 60; - private static final int MS_PER_DAY = MS_PER_HOUR * 24; - private static final int NS_PER_MS = 1_000_000; - - private BufferAllocator rootAllocator; - - private StructTypeInfo rowTypeInfo; - private StructObjectInspector rowObjectInspector; - private Configuration conf; - private Serializer serializer; - private Deserializer deserializer; - - @Override - public void initialize(Configuration conf, Properties tbl) throws SerDeException { - this.conf = conf; - - rootAllocator = RootAllocatorFactory.INSTANCE.getRootAllocator(conf); - - final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl - .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA); - - // Create an object inspector - final List columnNames; - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList<>(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); - } - final List columnTypes; - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList<>(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - } - rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); - rowObjectInspector = - (StructObjectInspector) getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo); - - final List fields = new ArrayList<>(); - final int size = columnNames.size(); - for (int i = 0; i < size; i++) { - fields.add(toField(columnNames.get(i), columnTypes.get(i))); - } - - serializer = new Serializer(new Schema(fields)); - deserializer = new Deserializer(); - } - - private class Serializer { - private final int MAX_BUFFERED_ROWS; - - // Schema - private final StructTypeInfo structTypeInfo; - private final List fieldTypeInfos; - private final int fieldSize; - - // Hive columns - private final VectorizedRowBatch vectorizedRowBatch; - private final VectorAssignRow vectorAssignRow; - private int batchSize; - - // Arrow columns - private final VectorSchemaRoot vectorSchemaRoot; - private final List arrowVectors; - private final List fieldWriters; - - private Serializer(Schema schema) throws SerDeException { - MAX_BUFFERED_ROWS = HiveConf.getIntVar(conf, HIVE_ARROW_BATCH_SIZE); - LOG.info("ArrowColumnarBatchSerDe max number of buffered columns: " + MAX_BUFFERED_ROWS); - - // Schema - structTypeInfo = (StructTypeInfo) getTypeInfoFromObjectInspector(rowObjectInspector); - fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - fieldSize = fieldTypeInfos.size(); - - // Init Arrow stuffs - vectorSchemaRoot = VectorSchemaRoot.create(schema, rootAllocator); - arrowVectors = vectorSchemaRoot.getFieldVectors(); - fieldWriters = Lists.newArrayList(); - for (FieldVector fieldVector : arrowVectors) { - final FieldWriter fieldWriter = - Types.getMinorTypeForArrowType( - fieldVector.getField().getType()).getNewFieldWriter(fieldVector); - fieldWriters.add(fieldWriter); - } - - // Init Hive stuffs - vectorizedRowBatch = new VectorizedRowBatch(fieldSize); - for (int i = 0; i < fieldSize; i++) { - final ColumnVector columnVector = createColumnVector(fieldTypeInfos.get(i)); - vectorizedRowBatch.cols[i] = columnVector; - columnVector.init(); - } - vectorizedRowBatch.ensureSize(MAX_BUFFERED_ROWS); - vectorAssignRow = new VectorAssignRow(); - try { - vectorAssignRow.init(rowObjectInspector); - } catch (HiveException e) { - throw new SerDeException(e); - } - } - - private ArrowWrapperWritable serializeBatch() { - for (int i = 0; i < vectorizedRowBatch.projectionSize; i++) { - final int projectedColumn = vectorizedRowBatch.projectedColumns[i]; - final ColumnVector hiveVector = vectorizedRowBatch.cols[projectedColumn]; - final TypeInfo fieldTypeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(i); - final FieldWriter fieldWriter = fieldWriters.get(i); - final FieldVector arrowVector = arrowVectors.get(i); - arrowVector.setValueCount(0); - fieldWriter.setPosition(0); - write(fieldWriter, arrowVector, hiveVector, fieldTypeInfo, 0, batchSize, true); - } - vectorizedRowBatch.reset(); - vectorSchemaRoot.setRowCount(batchSize); - - batchSize = 0; - return new ArrowWrapperWritable(vectorSchemaRoot); - } - - private BaseWriter getWriter(FieldWriter writer, TypeInfo typeInfo, String name) { - switch (typeInfo.getCategory()) { - case PRIMITIVE: - switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { - case BOOLEAN: - return writer.bit(name); - case BYTE: - return writer.tinyInt(name); - case SHORT: - return writer.smallInt(name); - case INT: - return writer.integer(name); - case LONG: - return writer.bigInt(name); - case FLOAT: - return writer.float4(name); - case DOUBLE: - return writer.float8(name); - case STRING: - case VARCHAR: - case CHAR: - return writer.varChar(name); - case DATE: - return writer.dateDay(name); - case TIMESTAMP: - return writer.timeStampMilli(name); - case BINARY: - return writer.varBinary(name); - case DECIMAL: - final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; - final int scale = decimalTypeInfo.scale(); - final int precision = decimalTypeInfo.precision(); - return writer.decimal(name, scale, precision); - case INTERVAL_YEAR_MONTH: - return writer.intervalYear(name); - case INTERVAL_DAY_TIME: - return writer.intervalDay(name); - case TIMESTAMPLOCALTZ: // VectorAssignRow doesn't support it - case VOID: - case UNKNOWN: - default: - throw new IllegalArgumentException(); - } - case LIST: - case UNION: - return writer.list(name); - case STRUCT: - return writer.map(name); - case MAP: // The caller will convert map to array - return writer.list(name).map(); - default: - throw new IllegalArgumentException(); - } - } - - private BaseWriter getWriter(FieldWriter writer, TypeInfo typeInfo) { - switch (typeInfo.getCategory()) { - case PRIMITIVE: - switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { - case BOOLEAN: - return writer.bit(); - case BYTE: - return writer.tinyInt(); - case SHORT: - return writer.smallInt(); - case INT: - return writer.integer(); - case LONG: - return writer.bigInt(); - case FLOAT: - return writer.float4(); - case DOUBLE: - return writer.float8(); - case STRING: - case VARCHAR: - case CHAR: - return writer.varChar(); - case DATE: - return writer.dateDay(); - case TIMESTAMP: - return writer.timeStampMilli(); - case BINARY: - return writer.varBinary(); - case INTERVAL_YEAR_MONTH: - return writer.intervalDay(); - case INTERVAL_DAY_TIME: - return writer.intervalYear(); - case TIMESTAMPLOCALTZ: // VectorAssignRow doesn't support it - case DECIMAL: // ListVector doesn't support it - case VOID: - case UNKNOWN: - default: - throw new IllegalArgumentException(); - } - case LIST: - case UNION: - return writer.list(); - case STRUCT: - return writer.map(); - case MAP: // The caller will convert map to array - return writer.list().map(); - default: - throw new IllegalArgumentException(); - } - } - - private void write(BaseWriter baseWriter, FieldVector arrowVector, ColumnVector hiveVector, - TypeInfo typeInfo, int offset, int length, boolean incrementIndex) { - - final IntConsumer writer; - switch (typeInfo.getCategory()) { - case PRIMITIVE: - final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); - switch (primitiveCategory) { - case BOOLEAN: - writer = index -> ((BitWriter) baseWriter).writeBit( - (int) ((LongColumnVector) hiveVector).vector[index]); - break; - case BYTE: - writer = index -> - ((TinyIntWriter) baseWriter).writeTinyInt( - (byte) ((LongColumnVector) hiveVector).vector[index]); - break; - case SHORT: - writer = index -> ((SmallIntWriter) baseWriter).writeSmallInt( - (short) ((LongColumnVector) hiveVector).vector[index]); - break; - case INT: - writer = index -> ((IntWriter) baseWriter).writeInt( - (int) ((LongColumnVector) hiveVector).vector[index]); - break; - case LONG: - writer = index -> ((BigIntWriter) baseWriter).writeBigInt( - ((LongColumnVector) hiveVector).vector[index]); - break; - case FLOAT: - writer = index -> ((Float4Writer) baseWriter).writeFloat4( - (float) ((DoubleColumnVector) hiveVector).vector[index]); - break; - case DOUBLE: - writer = index -> ((Float8Writer) baseWriter).writeFloat8( - ((DoubleColumnVector) hiveVector).vector[index]); - break; - case STRING: - case VARCHAR: - case CHAR: - writer = index -> { - BytesColumnVector stringVector = (BytesColumnVector) hiveVector; - byte[] bytes = stringVector.vector[index]; - int start = stringVector.start[index]; - int bytesLength = stringVector.length[index]; - try (ArrowBuf arrowBuf = rootAllocator.buffer(bytesLength)) { - arrowBuf.setBytes(0, bytes, start, bytesLength); - ((VarCharWriter) baseWriter).writeVarChar(0, bytesLength, arrowBuf); - } - }; - break; - case DATE: - writer = index -> ((DateDayWriter) baseWriter).writeDateDay( - (int) ((LongColumnVector) hiveVector).vector[index]); - break; - case TIMESTAMP: - writer = index -> ((TimeStampMilliWriter) baseWriter).writeTimeStampMilli( - ((TimestampColumnVector) hiveVector).getTime(index)); - break; - case BINARY: - writer = index -> { - BytesColumnVector binaryVector = (BytesColumnVector) hiveVector; - final byte[] bytes = binaryVector.vector[index]; - final int start = binaryVector.start[index]; - final int byteLength = binaryVector.length[index]; - try (ArrowBuf arrowBuf = rootAllocator.buffer(byteLength)) { - arrowBuf.setBytes(0, bytes, start, byteLength); - ((VarBinaryWriter) baseWriter).writeVarBinary(0, byteLength, arrowBuf); - } - }; - break; - case DECIMAL: - writer = index -> { - DecimalColumnVector hiveDecimalVector = (DecimalColumnVector) hiveVector; - ((DecimalWriter) baseWriter).writeDecimal( - hiveDecimalVector.vector[index].getHiveDecimal().bigDecimalValue() - .setScale(hiveDecimalVector.scale)); - }; - break; - case INTERVAL_YEAR_MONTH: - writer = index -> ((IntervalYearWriter) baseWriter).writeIntervalYear( - (int) ((LongColumnVector) hiveVector).vector[index]); - break; - case INTERVAL_DAY_TIME: - writer = index -> { - IntervalDayTimeColumnVector intervalDayTimeVector = - (IntervalDayTimeColumnVector) hiveVector; - final long millis = (intervalDayTimeVector.getTotalSeconds(index) * 1_000) + - (intervalDayTimeVector.getNanos(index) / 1_000_000); - final int days = (int) (millis / MS_PER_DAY); - ((IntervalDayWriter) baseWriter).writeIntervalDay( - days, (int) (millis % MS_PER_DAY)); - }; - break; - case VOID: - case UNKNOWN: - case TIMESTAMPLOCALTZ: - default: - throw new IllegalArgumentException(); - } - break; - case LIST: - final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; - final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); - final ListColumnVector hiveListVector = (ListColumnVector) hiveVector; - final ColumnVector hiveElementVector = hiveListVector.child; - final FieldVector arrowElementVector = arrowVector.getChildrenFromFields().get(0); - final BaseWriter.ListWriter listWriter = (BaseWriter.ListWriter) baseWriter; - final BaseWriter elementWriter = getWriter((FieldWriter) baseWriter, elementTypeInfo); - - writer = index -> { - final int listOffset = (int) hiveListVector.offsets[index]; - final int listLength = (int) hiveListVector.lengths[index]; - listWriter.startList(); - write(elementWriter, arrowElementVector, hiveElementVector, elementTypeInfo, - listOffset, listLength, false); - listWriter.endList(); - }; - - incrementIndex = false; - break; - case STRUCT: - final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - final StructColumnVector hiveStructVector = (StructColumnVector) hiveVector; - final List arrowFieldVectors = arrowVector.getChildrenFromFields(); - final ColumnVector[] hiveFieldVectors = hiveStructVector.fields; - final BaseWriter.MapWriter structWriter = (BaseWriter.MapWriter) baseWriter; - final int fieldSize = fieldTypeInfos.size(); - - writer = index -> { - structWriter.start(); - for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) { - final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); - final String fieldName = structTypeInfo.getAllStructFieldNames().get(fieldIndex); - final ColumnVector hiveFieldVector = hiveFieldVectors[fieldIndex]; - final BaseWriter fieldWriter = getWriter((FieldWriter) structWriter, fieldTypeInfo, - fieldName); - final FieldVector arrowFieldVector = arrowFieldVectors.get(fieldIndex); - write(fieldWriter, arrowFieldVector, hiveFieldVector, fieldTypeInfo, index, 1, false); - } - structWriter.end(); - }; - - incrementIndex = false; - break; - case UNION: - final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; - final List objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); - final UnionColumnVector hiveUnionVector = (UnionColumnVector) hiveVector; - final ColumnVector[] hiveObjectVectors = hiveUnionVector.fields; - final UnionWriter unionWriter = (UnionWriter) baseWriter; - - writer = index -> { - final int tag = hiveUnionVector.tags[index]; - final ColumnVector hiveObjectVector = hiveObjectVectors[tag]; - final TypeInfo objectTypeInfo = objectTypeInfos.get(tag); - write(unionWriter, arrowVector, hiveObjectVector, objectTypeInfo, index, 1, false); - }; - break; - case MAP: - final ListTypeInfo structListTypeInfo = - toStructListTypeInfo((MapTypeInfo) typeInfo); - final ListColumnVector structListVector = - toStructListVector((MapColumnVector) hiveVector); - - writer = index -> write(baseWriter, arrowVector, structListVector, structListTypeInfo, - index, length, false); - - incrementIndex = false; - break; - default: - throw new IllegalArgumentException(); - } - - if (hiveVector.noNulls) { - if (hiveVector.isRepeating) { - for (int i = 0; i < length; i++) { - writer.accept(0); - if (incrementIndex) { - baseWriter.setPosition(baseWriter.getPosition() + 1); - } - } - } else { - if (vectorizedRowBatch.selectedInUse) { - for (int j = 0; j < length; j++) { - final int i = vectorizedRowBatch.selected[j]; - writer.accept(offset + i); - if (incrementIndex) { - baseWriter.setPosition(baseWriter.getPosition() + 1); - } - } - } else { - for (int i = 0; i < length; i++) { - writer.accept(offset + i); - if (incrementIndex) { - baseWriter.setPosition(baseWriter.getPosition() + 1); - } - } - } - } - } else { - if (hiveVector.isRepeating) { - for (int i = 0; i < length; i++) { - if (hiveVector.isNull[0]) { - writeNull(baseWriter); - } else { - writer.accept(0); - } - if (incrementIndex) { - baseWriter.setPosition(baseWriter.getPosition() + 1); - } - } - } else { - if (vectorizedRowBatch.selectedInUse) { - for (int j = 0; j < length; j++) { - final int i = vectorizedRowBatch.selected[j]; - if (hiveVector.isNull[offset + i]) { - writeNull(baseWriter); - } else { - writer.accept(offset + i); - } - if (incrementIndex) { - baseWriter.setPosition(baseWriter.getPosition() + 1); - } - } - } else { - for (int i = 0; i < length; i++) { - if (hiveVector.isNull[offset + i]) { - writeNull(baseWriter); - } else { - writer.accept(offset + i); - } - if (incrementIndex) { - baseWriter.setPosition(baseWriter.getPosition() + 1); - } - } - } - } - } - } - - public ArrowWrapperWritable serialize(Object obj, ObjectInspector objInspector) { - // if row is null, it means there are no more rows (closeOp()). - // another case can be that the buffer is full. - if (obj == null) { - return serializeBatch(); - } - List standardObjects = new ArrayList(); - ObjectInspectorUtils.copyToStandardObject(standardObjects, obj, - ((StructObjectInspector) objInspector), WRITABLE); - - vectorAssignRow.assignRow(vectorizedRowBatch, batchSize, standardObjects, fieldSize); - batchSize++; - if (batchSize == MAX_BUFFERED_ROWS) { - return serializeBatch(); - } - return null; - } - } - - private static void writeNull(BaseWriter baseWriter) { - if (baseWriter instanceof UnionListWriter) { - // UnionListWriter should implement AbstractFieldWriter#writeNull - BaseWriter.ListWriter listWriter = ((UnionListWriter) baseWriter).list(); - listWriter.setPosition(listWriter.getPosition() + 1); - } else { - // FieldWriter should have a super method of AbstractFieldWriter#writeNull - try { - Method method = baseWriter.getClass().getMethod("writeNull"); - method.setAccessible(true); - method.invoke(baseWriter); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - } - - private static abstract class PrimitiveReader { - final void read(FieldReader reader, ColumnVector columnVector, int offset, int length) { - for (int i = 0; i < length; i++) { - final int rowIndex = offset + i; - if (reader.isSet()) { - doRead(reader, columnVector, rowIndex); - } else { - VectorizedBatchUtil.setNullColIsNullValue(columnVector, rowIndex); - } - reader.setPosition(reader.getPosition() + 1); - } - } - - abstract void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex); - } - - private class Deserializer { - private final VectorExtractRow vectorExtractRow; - private final VectorizedRowBatch vectorizedRowBatch; - private Object[][] rows; - - public Deserializer() throws SerDeException { - vectorExtractRow = new VectorExtractRow(); - final List fieldTypeInfoList = rowTypeInfo.getAllStructFieldTypeInfos(); - final int fieldCount = fieldTypeInfoList.size(); - final TypeInfo[] typeInfos = fieldTypeInfoList.toArray(new TypeInfo[fieldCount]); - try { - vectorExtractRow.init(typeInfos); - } catch (HiveException e) { - throw new SerDeException(e); - } - - vectorizedRowBatch = new VectorizedRowBatch(fieldCount); - for (int i = 0; i < fieldCount; i++) { - final ColumnVector columnVector = createColumnVector(typeInfos[i]); - columnVector.init(); - vectorizedRowBatch.cols[i] = columnVector; - } - } - - public Object deserialize(Writable writable) { - final ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) writable; - final VectorSchemaRoot vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot(); - final List fieldVectors = vectorSchemaRoot.getFieldVectors(); - final int fieldCount = fieldVectors.size(); - final int rowCount = vectorSchemaRoot.getRowCount(); - vectorizedRowBatch.ensureSize(rowCount); - - if (rows == null || rows.length < rowCount ) { - rows = new Object[rowCount][]; - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - rows[rowIndex] = new Object[fieldCount]; - } - } - - for (int i = 0; i < fieldCount; i++) { - final FieldVector fieldVector = fieldVectors.get(i); - final FieldReader fieldReader = fieldVector.getReader(); - fieldReader.setPosition(0); - final int projectedCol = vectorizedRowBatch.projectedColumns[i]; - final ColumnVector columnVector = vectorizedRowBatch.cols[projectedCol]; - final TypeInfo typeInfo = rowTypeInfo.getAllStructFieldTypeInfos().get(i); - read(fieldReader, columnVector, typeInfo, 0, rowCount); - } - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - vectorExtractRow.extractRow(vectorizedRowBatch, rowIndex, rows[rowIndex]); - } - vectorizedRowBatch.reset(); - return rows; - } - - private void read(FieldReader reader, ColumnVector columnVector, TypeInfo typeInfo, - int rowOffset, int rowLength) { - switch (typeInfo.getCategory()) { - case PRIMITIVE: - final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); - final PrimitiveReader primitiveReader; - switch (primitiveCategory) { - case BOOLEAN: - primitiveReader = new PrimitiveReader() { - NullableBitHolder holder = new NullableBitHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((LongColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case BYTE: - primitiveReader = new PrimitiveReader() { - NullableTinyIntHolder holder = new NullableTinyIntHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((LongColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case SHORT: - primitiveReader = new PrimitiveReader() { - NullableSmallIntHolder holder = new NullableSmallIntHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((LongColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case INT: - primitiveReader = new PrimitiveReader() { - NullableIntHolder holder = new NullableIntHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((LongColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case LONG: - primitiveReader = new PrimitiveReader() { - NullableBigIntHolder holder = new NullableBigIntHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((LongColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case FLOAT: - primitiveReader = new PrimitiveReader() { - NullableFloat4Holder holder = new NullableFloat4Holder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((DoubleColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case DOUBLE: - primitiveReader = new PrimitiveReader() { - NullableFloat8Holder holder = new NullableFloat8Holder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((DoubleColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case STRING: - case VARCHAR: - case CHAR: - primitiveReader = new PrimitiveReader() { - NullableVarCharHolder holder = new NullableVarCharHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - int varCharSize = holder.end - holder.start; - byte[] varCharBytes = new byte[varCharSize]; - holder.buffer.getBytes(holder.start, varCharBytes); - ((BytesColumnVector) columnVector).setVal(rowIndex, varCharBytes, 0, varCharSize); - } - }; - break; - case DATE: - primitiveReader = new PrimitiveReader() { - NullableDateDayHolder holder = new NullableDateDayHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((LongColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case TIMESTAMP: - primitiveReader = new PrimitiveReader() { - NullableTimeStampMilliHolder timeStampMilliHolder = - new NullableTimeStampMilliHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(timeStampMilliHolder); - ((TimestampColumnVector) columnVector).set(rowIndex, - new Timestamp(timeStampMilliHolder.value)); - } - }; - break; - case BINARY: - primitiveReader = new PrimitiveReader() { - NullableVarBinaryHolder holder = new NullableVarBinaryHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - final int binarySize = holder.end - holder.start; - final byte[] binaryBytes = new byte[binarySize]; - holder.buffer.getBytes(holder.start, binaryBytes); - ((BytesColumnVector) columnVector).setVal(rowIndex, binaryBytes, 0, binarySize); - } - }; - break; - case DECIMAL: - primitiveReader = new PrimitiveReader() { - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - ((DecimalColumnVector) columnVector).set(rowIndex, - HiveDecimal.create(reader.readBigDecimal())); - } - }; - break; - case INTERVAL_YEAR_MONTH: - primitiveReader = new PrimitiveReader() { - NullableIntervalYearHolder holder = new NullableIntervalYearHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - reader.read(holder); - ((LongColumnVector) columnVector).vector[rowIndex] = holder.value; - } - }; - break; - case INTERVAL_DAY_TIME: - primitiveReader = new PrimitiveReader() { - NullableIntervalDayHolder holder = new NullableIntervalDayHolder(); - - @Override - void doRead(FieldReader reader, ColumnVector columnVector, int rowIndex) { - IntervalDayTimeColumnVector intervalDayTimeVector = - (IntervalDayTimeColumnVector) columnVector; - reader.read(holder); - HiveIntervalDayTime intervalDayTime = new HiveIntervalDayTime( - holder.days, // days - holder.milliseconds / MS_PER_HOUR, // hour - (holder.milliseconds % MS_PER_HOUR) / MS_PER_MINUTE, // minute - (holder.milliseconds % MS_PER_MINUTE) / MS_PER_SECOND, // second - (holder.milliseconds % MS_PER_SECOND) * NS_PER_MS); // nanosecond - intervalDayTimeVector.set(rowIndex, intervalDayTime); - } - }; - break; - default: - throw new IllegalArgumentException(); - } - primitiveReader.read(reader, columnVector, rowOffset, rowLength); - break; - case LIST: - final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; - final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); - final ListColumnVector listVector = (ListColumnVector) columnVector; - final ColumnVector elementVector = listVector.child; - final FieldReader elementReader = reader.reader(); - - int listOffset = 0; - for (int rowIndex = 0; rowIndex < rowLength; rowIndex++) { - final int adjustedRowIndex = rowOffset + rowIndex; - reader.setPosition(adjustedRowIndex); - final int listLength = reader.size(); - listVector.offsets[adjustedRowIndex] = listOffset; - listVector.lengths[adjustedRowIndex] = listLength; - read(elementReader, elementVector, elementTypeInfo, listOffset, listLength); - listOffset += listLength; - } - break; - case STRUCT: - final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - final List fieldNames = structTypeInfo.getAllStructFieldNames(); - final int fieldSize = fieldNames.size(); - final StructColumnVector structVector = (StructColumnVector) columnVector; - final ColumnVector[] fieldVectors = structVector.fields; - - for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) { - final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); - final FieldReader fieldReader = reader.reader(fieldNames.get(fieldIndex)); - final ColumnVector fieldVector = fieldVectors[fieldIndex]; - read(fieldReader, fieldVector, fieldTypeInfo, rowOffset, rowLength); - } - break; - case UNION: - final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; - final List objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); - final UnionColumnVector unionVector = (UnionColumnVector) columnVector; - final ColumnVector[] objectVectors = unionVector.fields; - final Map minorTypeToTagMap = Maps.newHashMap(); - for (int tag = 0; tag < objectTypeInfos.size(); tag++) { - minorTypeToTagMap.put(toMinorType(objectTypeInfos.get(tag)), tag); - } - - final UnionReader unionReader = (UnionReader) reader; - for (int rowIndex = 0; rowIndex < rowLength; rowIndex++) { - final int adjustedRowIndex = rowIndex + rowOffset; - unionReader.setPosition(adjustedRowIndex); - final Types.MinorType minorType = unionReader.getMinorType(); - final int tag = minorTypeToTagMap.get(minorType); - unionVector.tags[adjustedRowIndex] = tag; - read(unionReader, objectVectors[tag], objectTypeInfos.get(tag), adjustedRowIndex, 1); - } - break; - case MAP: - final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; - final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(mapTypeInfo); - final MapColumnVector hiveMapVector = (MapColumnVector) columnVector; - final ListColumnVector mapStructListVector = toStructListVector(hiveMapVector); - final StructColumnVector mapStructVector = (StructColumnVector) mapStructListVector.child; - read(reader, mapStructListVector, mapStructListTypeInfo, rowOffset, rowLength); - - hiveMapVector.isRepeating = mapStructListVector.isRepeating; - hiveMapVector.childCount = mapStructListVector.childCount; - hiveMapVector.noNulls = mapStructListVector.noNulls; - System.arraycopy(mapStructListVector.offsets, 0, hiveMapVector.offsets, 0, rowLength); - System.arraycopy(mapStructListVector.lengths, 0, hiveMapVector.lengths, 0, rowLength); - hiveMapVector.keys = mapStructVector.fields[0]; - hiveMapVector.values = mapStructVector.fields[1]; - break; - default: - throw new IllegalArgumentException(); - } - } - } - - private static Types.MinorType toMinorType(TypeInfo typeInfo) { - switch (typeInfo.getCategory()) { - case PRIMITIVE: - switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { - case BOOLEAN: - return Types.MinorType.BIT; - case BYTE: - return Types.MinorType.TINYINT; - case SHORT: - return Types.MinorType.SMALLINT; - case INT: - return Types.MinorType.INT; - case LONG: - return Types.MinorType.BIGINT; - case FLOAT: - return Types.MinorType.FLOAT4; - case DOUBLE: - return Types.MinorType.FLOAT8; - case STRING: - case VARCHAR: - case CHAR: - return Types.MinorType.VARCHAR; - case DATE: - return Types.MinorType.DATEDAY; - case TIMESTAMP: - return Types.MinorType.TIMESTAMPMILLI; - case BINARY: - return Types.MinorType.VARBINARY; - case DECIMAL: - return Types.MinorType.DECIMAL; - case INTERVAL_YEAR_MONTH: - return Types.MinorType.INTERVALYEAR; - case INTERVAL_DAY_TIME: - return Types.MinorType.INTERVALDAY; - case VOID: - case TIMESTAMPLOCALTZ: - case UNKNOWN: - default: - throw new IllegalArgumentException(); - } - case LIST: - return Types.MinorType.LIST; - case STRUCT: - return Types.MinorType.MAP; - case UNION: - return Types.MinorType.UNION; - case MAP: - // Apache Arrow doesn't have a map vector, so it's converted to a list vector of a struct - // vector. - return Types.MinorType.LIST; - default: - throw new IllegalArgumentException(); - } - } - - private static ListTypeInfo toStructListTypeInfo(MapTypeInfo mapTypeInfo) { - final StructTypeInfo structTypeInfo = new StructTypeInfo(); - structTypeInfo.setAllStructFieldNames(Lists.newArrayList("keys", "values")); - structTypeInfo.setAllStructFieldTypeInfos(Lists.newArrayList( - mapTypeInfo.getMapKeyTypeInfo(), mapTypeInfo.getMapValueTypeInfo())); - final ListTypeInfo structListTypeInfo = new ListTypeInfo(); - structListTypeInfo.setListElementTypeInfo(structTypeInfo); - return structListTypeInfo; - } - - private static Field toField(String name, TypeInfo typeInfo) { - switch (typeInfo.getCategory()) { - case PRIMITIVE: - final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - switch (primitiveTypeInfo.getPrimitiveCategory()) { - case BOOLEAN: - return Field.nullable(name, Types.MinorType.BIT.getType()); - case BYTE: - return Field.nullable(name, Types.MinorType.TINYINT.getType()); - case SHORT: - return Field.nullable(name, Types.MinorType.SMALLINT.getType()); - case INT: - return Field.nullable(name, Types.MinorType.INT.getType()); - case LONG: - return Field.nullable(name, Types.MinorType.BIGINT.getType()); - case FLOAT: - return Field.nullable(name, Types.MinorType.FLOAT4.getType()); - case DOUBLE: - return Field.nullable(name, Types.MinorType.FLOAT8.getType()); - case STRING: - return Field.nullable(name, Types.MinorType.VARCHAR.getType()); - case DATE: - return Field.nullable(name, Types.MinorType.DATEDAY.getType()); - case TIMESTAMP: - return Field.nullable(name, Types.MinorType.TIMESTAMPMILLI.getType()); - case TIMESTAMPLOCALTZ: - final TimestampLocalTZTypeInfo timestampLocalTZTypeInfo = - (TimestampLocalTZTypeInfo) typeInfo; - final String timeZone = timestampLocalTZTypeInfo.getTimeZone().toString(); - return Field.nullable(name, new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone)); - case BINARY: - return Field.nullable(name, Types.MinorType.VARBINARY.getType()); - case DECIMAL: - final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; - final int precision = decimalTypeInfo.precision(); - final int scale = decimalTypeInfo.scale(); - return Field.nullable(name, new ArrowType.Decimal(precision, scale)); - case VARCHAR: - return Field.nullable(name, Types.MinorType.VARCHAR.getType()); - case CHAR: - return Field.nullable(name, Types.MinorType.VARCHAR.getType()); - case INTERVAL_YEAR_MONTH: - return Field.nullable(name, Types.MinorType.INTERVALYEAR.getType()); - case INTERVAL_DAY_TIME: - return Field.nullable(name, Types.MinorType.INTERVALDAY.getType()); - default: - throw new IllegalArgumentException(); - } - case LIST: - final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; - final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); - return new Field(name, FieldType.nullable(Types.MinorType.LIST.getType()), - Lists.newArrayList(toField(DEFAULT_ARROW_FIELD_NAME, elementTypeInfo))); - case STRUCT: - final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - final List fieldNames = structTypeInfo.getAllStructFieldNames(); - final List structFields = Lists.newArrayList(); - final int structSize = fieldNames.size(); - for (int i = 0; i < structSize; i++) { - structFields.add(toField(fieldNames.get(i), fieldTypeInfos.get(i))); - } - return new Field(name, FieldType.nullable(Types.MinorType.MAP.getType()), structFields); - case UNION: - final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; - final List objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); - final List unionFields = Lists.newArrayList(); - final int unionSize = unionFields.size(); - for (int i = 0; i < unionSize; i++) { - unionFields.add(toField(DEFAULT_ARROW_FIELD_NAME, objectTypeInfos.get(i))); - } - return new Field(name, FieldType.nullable(Types.MinorType.UNION.getType()), unionFields); - case MAP: - final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; - final TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo(); - final TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo(); - - final StructTypeInfo mapStructTypeInfo = new StructTypeInfo(); - mapStructTypeInfo.setAllStructFieldNames(Lists.newArrayList("keys", "values")); - mapStructTypeInfo.setAllStructFieldTypeInfos( - Lists.newArrayList(keyTypeInfo, valueTypeInfo)); - - final ListTypeInfo mapListStructTypeInfo = new ListTypeInfo(); - mapListStructTypeInfo.setListElementTypeInfo(mapStructTypeInfo); - - return toField(name, mapListStructTypeInfo); - default: - throw new IllegalArgumentException(); - } - } - - private static ListColumnVector toStructListVector(MapColumnVector mapVector) { - final StructColumnVector structVector; - final ListColumnVector structListVector; - structVector = new StructColumnVector(); - structVector.fields = new ColumnVector[] {mapVector.keys, mapVector.values}; - structListVector = new ListColumnVector(); - structListVector.child = structVector; - System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, mapVector.childCount); - System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, mapVector.childCount); - structListVector.childCount = mapVector.childCount; - structListVector.isRepeating = mapVector.isRepeating; - structListVector.noNulls = mapVector.noNulls; - return structListVector; - } - - @Override - public Class getSerializedClass() { - return ArrowWrapperWritable.class; - } - - @Override - public ArrowWrapperWritable serialize(Object obj, ObjectInspector objInspector) { - return serializer.serialize(obj, objInspector); - } - - @Override - public SerDeStats getSerDeStats() { - return null; - } - - @Override - public Object deserialize(Writable writable) { - return deserializer.deserialize(writable); - } - - @Override - public ObjectInspector getObjectInspector() { - return rowObjectInspector; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java deleted file mode 100644 index df7b53f42a..0000000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.arrow; - -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.hadoop.io.Writable; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -public class ArrowWrapperWritable implements Writable { - private VectorSchemaRoot vectorSchemaRoot; - - public ArrowWrapperWritable(VectorSchemaRoot vectorSchemaRoot) { - this.vectorSchemaRoot = vectorSchemaRoot; - } - - public VectorSchemaRoot getVectorSchemaRoot() { - return vectorSchemaRoot; - } - - @Override - public void write(DataOutput dataOutput) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public void readFields(DataInput dataInput) throws IOException { - throw new UnsupportedOperationException(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/arrow/RootAllocatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/arrow/RootAllocatorFactory.java deleted file mode 100644 index 78cc188e65..0000000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/arrow/RootAllocatorFactory.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.arrow; - -import org.apache.arrow.memory.RootAllocator; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; - -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ARROW_ROOT_ALLOCATOR_LIMIT; - -/** - * Thread-safe singleton factory for RootAllocator - */ -public enum RootAllocatorFactory { - INSTANCE; - - private RootAllocator rootAllocator; - - RootAllocatorFactory() { - } - - public synchronized RootAllocator getRootAllocator(Configuration conf) { - if (rootAllocator == null) { - final long limit = HiveConf.getLongVar(conf, HIVE_ARROW_ROOT_ALLOCATOR_LIMIT); - rootAllocator = new RootAllocator(limit); - } - return rootAllocator; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java index 348f9df773..1d7eceb1ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java @@ -435,12 +435,12 @@ public void readEncodedColumns(int stripeIx, StripeInformation stripe, try { if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding) || index == null) { // This stream is for entire stripe and needed for every RG; uncompress once and reuse. + if (isTracingEnabled) { + LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length); + } + trace.logStartStripeStream(sctx.kind); if (sctx.stripeLevelStream == null) { - if (isTracingEnabled) { - LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" - + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length); - } - trace.logStartStripeStream(sctx.kind); sctx.stripeLevelStream = POOLS.csdPool.take(); // We will be using this for each RG while also sending RGs to processing. // To avoid buffers being unlocked, run refcount one ahead; so each RG diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java index 646b214249..42532f9a0e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java @@ -31,7 +31,6 @@ import org.apache.orc.CompressionCodec; import org.apache.orc.TypeDescription; import org.apache.orc.TypeDescription.Category; -import org.apache.orc.impl.InStream; import org.apache.orc.impl.PositionProvider; import org.apache.orc.impl.SettableUncompressedStream; import org.apache.orc.impl.TreeReaderFactory; @@ -214,11 +213,6 @@ public static StreamReaderBuilder builder() { } } - private static void skipCompressedIndex(boolean isCompressed, PositionProvider index) { - if (!isCompressed) return; - index.getNext(); - } - protected static class StringStreamReader extends StringTreeReader implements SettableTreeReader { private boolean _isFileCompressed; @@ -266,30 +260,30 @@ public void seek(PositionProvider index) throws IOException { // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); if (_dataStream != null && _dataStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } ((StringDictionaryTreeReader) reader).getReader().seek(index); - } // No need to skip seek here, index won't be used anymore. + } } else { // DIRECT encoding // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); - // TODO: why does the original code not just use _dataStream that it passes in as stream? - InStream stream = ((StringDirectTreeReader) reader).getStream(); - // TODO: not clear why this check and skipSeek are needed. if (_dataStream != null && _dataStream.available() > 0) { - stream.seek(index); - } else { - assert stream == _dataStream; - skipSeek(index); + if (_isFileCompressed) { + index.getNext(); + } + ((StringDirectTreeReader) reader).getStream().seek(index); } - skipCompressedIndex(_isFileCompressed, index); if (_lengthStream != null && _lengthStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } ((StringDirectTreeReader) reader).getLengths().seek(index); - } // No need to skip seek here, index won't be used anymore. + } } } @@ -836,8 +830,10 @@ public void seek(PositionProvider index) throws IOException { // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); if (_dataStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } stream.seek(index); } } @@ -949,8 +945,10 @@ public void seek(PositionProvider index) throws IOException { // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); if (_dataStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } stream.seek(index); } } @@ -1073,19 +1071,19 @@ public void seek(PositionProvider index) throws IOException { // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); - // TODO: not clear why this check and skipSeek are needed. if (_valueStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } valueStream.seek(index); - } else { - assert valueStream == _valueStream; - skipSeek(index); } - skipCompressedIndex(_isFileCompressed, index); if (_scaleStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } scaleReader.seek(index); - } // No need to skip seek here, index won't be used anymore. + } } @Override @@ -1377,29 +1375,30 @@ public void seek(PositionProvider index) throws IOException { // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); if (_dataStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } ((StringDictionaryTreeReader) reader).getReader().seek(index); - } // No need to skip seek here, index won't be used anymore. + } } else { // DIRECT encoding // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); - InStream stream = ((StringDirectTreeReader) reader).getStream(); - // TODO: not clear why this check and skipSeek are needed. if (_dataStream.available() > 0) { - stream.seek(index); - } else { - assert stream == _dataStream; - skipSeek(index); + if (_isFileCompressed) { + index.getNext(); + } + ((StringDirectTreeReader) reader).getStream().seek(index); } - skipCompressedIndex(_isFileCompressed, index); if (_lengthStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } ((StringDirectTreeReader) reader).getLengths().seek(index); - } // No need to skip seek here, index won't be used anymore. + } } } @@ -1575,29 +1574,30 @@ public void seek(PositionProvider index) throws IOException { // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); if (_dataStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } ((StringDictionaryTreeReader) reader).getReader().seek(index); - } // No need to skip seek here, index won't be used anymore. + } } else { // DIRECT encoding // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); - InStream stream = ((StringDirectTreeReader) reader).getStream(); - // TODO: not clear why this check and skipSeek are needed. if (_dataStream.available() > 0) { - stream.seek(index); - } else { - assert stream == _dataStream; - skipSeek(index); + if (_isFileCompressed) { + index.getNext(); + } + ((StringDirectTreeReader) reader).getStream().seek(index); } - skipCompressedIndex(_isFileCompressed, index); if (_lengthStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } ((StringDirectTreeReader) reader).getLengths().seek(index); - } // No need to skip seek here, index won't be used anymore. + } } } @@ -1885,19 +1885,19 @@ public void seek(PositionProvider index) throws IOException { // data stream could be empty stream or already reached end of stream before present stream. // This can happen if all values in stream are nulls or last row group values are all null. - skipCompressedIndex(_isFileCompressed, index); - // TODO: not clear why this check and skipSeek are needed. if (_dataStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } stream.seek(index); - } else { - assert stream == _dataStream; - skipSeek(index); } - skipCompressedIndex(_isFileCompressed, index); if (lengths != null && _lengthsStream.available() > 0) { + if (_isFileCompressed) { + index.getNext(); + } lengths.seek(index); - } // No need to skip seek here, index won't be used anymore. + } } @Override @@ -2132,12 +2132,6 @@ public static StructTreeReader createRootTreeReader(TypeDescription[] batchSchem } - private static void skipSeek(PositionProvider index) { - // Must be consistent with uncompressed stream seek in ORC. See call site comments. - index.getNext(); - } - - private static TreeReader createEncodedTreeReader(TypeDescription schema, List encodings, OrcEncodedColumnBatch batch, CompressionCodec codec, TreeReaderFactory.Context context) throws IOException { diff --git ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index 248632127a..4fd1d4ec54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -18,39 +18,48 @@ Licensed to the Apache Software Foundation (ASF) under one package org.apache.hadoop.hive.ql.lockmgr; import com.google.common.annotations.VisibleForTesting; - -import org.apache.curator.shaded.com.google.common.collect.Lists; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.plan.HiveOperation; -import org.apache.hadoop.hive.ql.plan.LockDatabaseDesc; -import org.apache.hadoop.hive.ql.plan.LockTableDesc; -import org.apache.hadoop.hive.ql.plan.UnlockDatabaseDesc; -import org.apache.hadoop.hive.ql.plan.UnlockTableDesc; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.common.util.ShutdownHookManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.common.ValidCompactorWriteIdList; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.LockComponentBuilder; import org.apache.hadoop.hive.metastore.LockRequestBuilder; -import org.apache.hadoop.hive.metastore.api.*; +import org.apache.hadoop.hive.metastore.api.DataOperationType; +import org.apache.hadoop.hive.metastore.api.LockComponent; +import org.apache.hadoop.hive.metastore.api.LockResponse; +import org.apache.hadoop.hive.metastore.api.LockState; +import org.apache.hadoop.hive.metastore.api.LockType; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchLockException; +import org.apache.hadoop.hive.metastore.api.NoSuchTxnException; +import org.apache.hadoop.hive.metastore.api.TxnAbortedException; +import org.apache.hadoop.hive.metastore.api.TxnToWriteId; import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.plan.LockDatabaseDesc; +import org.apache.hadoop.hive.ql.plan.LockTableDesc; +import org.apache.hadoop.hive.ql.plan.UnlockDatabaseDesc; +import org.apache.hadoop.hive.ql.plan.UnlockTableDesc; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hive.common.util.ShutdownHookManager; import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.security.PrivilegedExceptionAction; @@ -503,60 +512,80 @@ LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isB /* base this on HiveOperation instead? this and DDL_NO_LOCK is peppered all over the code... Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think makes sense everywhere). This however would be problematic for merge...*/ - case DDL_EXCLUSIVE: + case DDL_EXCLUSIVE: + compBuilder.setExclusive(); + compBuilder.setOperationType(DataOperationType.NO_TXN); + break; + case INSERT_OVERWRITE: + t = getTable(output); + if (AcidUtils.isTransactionalTable(t)) { + if (conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK)) { + compBuilder.setExclusive(); + } else { + compBuilder.setSemiShared(); + } + compBuilder.setOperationType(DataOperationType.UPDATE); + } else { compBuilder.setExclusive(); compBuilder.setOperationType(DataOperationType.NO_TXN); - break; - case INSERT_OVERWRITE: - t = getTable(output); - if (AcidUtils.isTransactionalTable(t)) { - if(conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK)) { - compBuilder.setExclusive(); - } else { - compBuilder.setSemiShared(); - } - compBuilder.setOperationType(DataOperationType.UPDATE); - } else { + } + break; + case INSERT: + assert t != null; + if (AcidUtils.isTransactionalTable(t)) { + compBuilder.setShared(); + } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) { + final HiveStorageHandler storageHandler = Preconditions.checkNotNull(t.getStorageHandler(), + "Thought all the non native tables have an instance of storage handler" + ); + LockType lockType = storageHandler.getLockType(output); + switch (lockType) { + case EXCLUSIVE: compBuilder.setExclusive(); - compBuilder.setOperationType(DataOperationType.NO_TXN); - } - break; - case INSERT: - assert t != null; - if(AcidUtils.isTransactionalTable(t)) { + break; + case SHARED_READ: compBuilder.setShared(); + break; + case SHARED_WRITE: + compBuilder.setSemiShared(); + break; + default: + throw new IllegalArgumentException(String + .format("Lock type [%s] for Database.Table [%s.%s] is unknown", lockType, t.getDbName(), + t.getTableName() + )); } - else { - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) { - compBuilder.setExclusive(); - } else { // this is backward compatible for non-ACID resources, w/o ACID semantics - compBuilder.setShared(); - } + + } else { + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) { + compBuilder.setExclusive(); + } else { // this is backward compatible for non-ACID resources, w/o ACID semantics + compBuilder.setShared(); } - compBuilder.setOperationType(DataOperationType.INSERT); - break; - case DDL_SHARED: - compBuilder.setShared(); - compBuilder.setOperationType(DataOperationType.NO_TXN); - break; + } + compBuilder.setOperationType(DataOperationType.INSERT); + break; + case DDL_SHARED: + compBuilder.setShared(); + compBuilder.setOperationType(DataOperationType.NO_TXN); + break; - case UPDATE: - compBuilder.setSemiShared(); - compBuilder.setOperationType(DataOperationType.UPDATE); - break; - case DELETE: - compBuilder.setSemiShared(); - compBuilder.setOperationType(DataOperationType.DELETE); - break; + case UPDATE: + compBuilder.setSemiShared(); + compBuilder.setOperationType(DataOperationType.UPDATE); + break; + case DELETE: + compBuilder.setSemiShared(); + compBuilder.setOperationType(DataOperationType.DELETE); + break; - case DDL_NO_LOCK: - continue; // No lock required here + case DDL_NO_LOCK: + continue; // No lock required here - default: - throw new RuntimeException("Unknown write type " + - output.getWriteType().toString()); + default: + throw new RuntimeException("Unknown write type " + output.getWriteType().toString()); } - if(t != null) { + if (t != null) { compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t)); } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index 1696243aeb..2ebb149354 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -18,22 +18,23 @@ package org.apache.hadoop.hive.ql.metadata; -import java.util.Collections; -import java.util.Map; - import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; +import java.util.Map; + /** * HiveStorageHandler defines a pluggable interface for adding * new storage handlers to Hive. A storage handler consists of @@ -167,4 +168,8 @@ public default StorageHandlerInfo getStorageHandlerInfo(Table table) throws Meta { return null; } + + default LockType getLockType(WriteEntity writeEntity){ + return LockType.EXCLUSIVE; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 0d2ed545f4..071756ddfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -166,7 +166,7 @@ public void truncateTable(String dbName, String tableName, List partName return deepCopy(table); // Original method used deepCopy(), do the same here. } // Try underlying client - return super.getTable(MetaStoreUtils.getDefaultCatalog(conf), dbname, name); + return super.getTable(DEFAULT_CATALOG_NAME, dbname, name); } // Need to override this one too or dropTable breaks because it doesn't find the table when checks diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index ba16f842d2..879b422449 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; @@ -744,12 +743,6 @@ public String getOwner() { return tTable.getOwner(); } - /** - * @return The owner type of the table. - * @see org.apache.hadoop.hive.metastore.api.Table#getOwnerType() - */ - public PrincipalType getOwnerType() { return tTable.getOwnerType(); } - /** * @return The table parameters. * @see org.apache.hadoop.hive.metastore.api.Table#getParameters() @@ -774,14 +767,6 @@ public void setOwner(String owner) { tTable.setOwner(owner); } - /** - * @param ownerType - * @see org.apache.hadoop.hive.metastore.api.Table#setOwnerType(org.apache.hadoop.hive.metastore.api.PrincipalType) - */ - public void setOwnerType(PrincipalType ownerType) { - tTable.setOwnerType(ownerType); - } - /** * @param retention * @see org.apache.hadoop.hive.metastore.api.Table#setRetention(int) diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java index e7b5af6167..c21967cb9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java @@ -218,7 +218,6 @@ public void showTableStatus(DataOutputStream out, Hive db, HiveConf conf, MapBuilder builder = MapBuilder.create(); builder.put("tableName", tbl.getTableName()); - builder.put("ownerType", (tbl.getOwnerType() != null) ? tbl.getOwnerType().name() : "null"); builder.put("owner", tbl.getOwner()); builder.put("location", tblLoc); builder.put("inputFormat", inputFormattCls); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 44687ef471..af283e693b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -425,7 +425,6 @@ private static void getStorageDescriptorInfo(StringBuilder tableInfo, private static void getTableMetaDataInformation(StringBuilder tableInfo, Table tbl, boolean isOutputPadded) { formatOutput("Database:", tbl.getDbName(), tableInfo); - formatOutput("OwnerType:", (tbl.getOwnerType() != null) ? tbl.getOwnerType().name() : "null", tableInfo); formatOutput("Owner:", tbl.getOwner(), tableInfo); formatOutput("CreateTime:", formatDate(tbl.getTTable().getCreateTime()), tableInfo); formatOutput("LastAccessTime:", formatDate(tbl.getTTable().getLastAccessTime()), tableInfo); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index a137bdf94f..0b1fe74f05 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -642,7 +642,7 @@ public boolean hasNext() { } else { computeNextMapping(iterationIdx.length - 1); } - return nextMapping != null; + return nextMapping != null; } public Mapping next() { @@ -659,9 +659,7 @@ private void computeNextMapping(int level) { if (level == 0) { nextMapping = null; } else { - int tmp = columnSets[level].nextSetBit(0); - nextMapping.set(columns[level], tmp); - iterationIdx[level] = tmp + 1; + iterationIdx[level] = 0; computeNextMapping(level - 1); } } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 37e98456f1..f0b9edaf01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -365,8 +365,6 @@ public void analyzeInternal(ASTNode input) throws SemanticException { analyzeAlterTableAddConstraint(ast, tableName); } else if(ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UPDATECOLUMNS) { analyzeAlterTableUpdateColumns(ast, tableName, partSpec); - } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_OWNER) { - analyzeAlterTableOwner(ast, tableName); } break; } @@ -1930,21 +1928,6 @@ private void addInputsOutputsAlterTable(String tableName, Map pa } } - private void analyzeAlterTableOwner(ASTNode ast, String tableName) throws SemanticException { - PrincipalDesc ownerPrincipal = AuthorizationParseUtils.getPrincipalDesc((ASTNode) ast.getChild(0)); - - if (ownerPrincipal.getType() == null) { - throw new SemanticException("Owner type can't be null in alter table set owner command"); - } - - if (ownerPrincipal.getName() == null) { - throw new SemanticException("Owner name can't be null in alter table set owner command"); - } - - AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, ownerPrincipal); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); - } - private void analyzeAlterTableLocation(ASTNode ast, String tableName, HashMap partSpec) throws SemanticException { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 3a7d99d655..918cc5a8d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -31,8 +31,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; @@ -130,8 +128,6 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(), input.getTokenStopIndex()); LOG.info("Explain analyze (running phase) for query " + query); - conf.unset(ValidTxnList.VALID_TXNS_KEY); - conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY); Context runCtx = null; try { runCtx = new Context(conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 8726974186..3712a53521 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -188,7 +188,6 @@ TOK_ALTERTABLE_COMPACT; TOK_ALTERTABLE_DROPCONSTRAINT; TOK_ALTERTABLE_ADDCONSTRAINT; TOK_ALTERTABLE_UPDATECOLUMNS; -TOK_ALTERTABLE_OWNER; TOK_MSCK; TOK_SHOWDATABASES; TOK_SHOWTABLES; @@ -1137,7 +1136,6 @@ alterTableStatementSuffix | alterStatementSuffixDropConstraint | alterStatementSuffixAddConstraint | partitionSpec? alterTblPartitionStatementSuffix -> alterTblPartitionStatementSuffix partitionSpec? - | alterStatementSuffixSetOwner ; alterTblPartitionStatementSuffix @@ -1483,12 +1481,6 @@ alterStatementSuffixCompact -> ^(TOK_ALTERTABLE_COMPACT $compactType blocking? tableProperties?) ; -alterStatementSuffixSetOwner -@init { pushMsg("alter table set owner", state); } -@after { popMsg(state); } - : KW_SET KW_OWNER principalName - -> ^(TOK_ALTERTABLE_OWNER principalName) - ; fileFormat @init { pushMsg("file format specification", state); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 682b641b21..e597872e86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -891,14 +891,14 @@ private static void createRegularImportTasks( } } - private static Table createNewTableMetadataObject(ImportTableDesc tblDesc) + private static Table createNewTableMetadataObject(ImportTableDesc tblDesk) throws SemanticException { - Table newTable = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName()); + Table newTable = new Table(tblDesk.getDatabaseName(), tblDesk.getTableName()); //so that we know the type of table we are creating: acid/MM to match what was exported - newTable.setParameters(tblDesc.getTblProps()); - if(tblDesc.isExternal() && AcidUtils.isTransactionalTable(newTable)) { + newTable.setParameters(tblDesk.getTblProps()); + if(tblDesk.isExternal() && AcidUtils.isTransactionalTable(newTable)) { throw new SemanticException("External tables may not be transactional: " + - Warehouse.getQualifiedName(tblDesc.getDatabaseName(), tblDesc.getTableName())); + Warehouse.getQualifiedName(tblDesk.getDatabaseName(), tblDesk.getTableName())); } return newTable; } @@ -1019,36 +1019,14 @@ private static void createReplImportTasks( x.getTasks().add(t); } } else { - // If table of current event has partition flag different from existing table, it means, some - // of the previous events in same batch have drop and create table events with same same but - // different partition flag. In this case, should go with current event's table type and so - // create the dummy table object for adding repl tasks. - boolean isOldTableValid = true; - if (table.isPartitioned() != isPartitioned(tblDesc)) { - table = createNewTableMetadataObject(tblDesc); - isOldTableValid = false; - } - // Table existed, and is okay to replicate into, not dropping and re-creating. - if (isPartitioned(tblDesc)) { + if (table.isPartitioned()) { x.getLOG().debug("table partitioned"); for (AddPartitionDesc addPartitionDesc : partitionDescs) { addPartitionDesc.setReplicationSpec(replicationSpec); Map partSpec = addPartitionDesc.getPartition(0).getPartSpec(); org.apache.hadoop.hive.ql.metadata.Partition ptn = null; - if (isOldTableValid) { - // If existing table is valid but the partition spec is different, then ignore partition - // validation and create new partition. - try { - ptn = x.getHive().getPartition(table, partSpec, false); - } catch (HiveException ex) { - ptn = null; - table = createNewTableMetadataObject(tblDesc); - isOldTableValid = false; - } - } - - if (ptn == null) { + if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) { if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId)); @@ -1093,7 +1071,7 @@ private static void createReplImportTasks( x.getLOG().debug("table non-partitioned"); if (!replicationSpec.isMetadataOnly()) { // repl-imports are replace-into unless the event is insert-into - loadTable(fromURI, table, replicationSpec.isReplace(), new Path(tblDesc.getLocation()), + loadTable(fromURI, table, replicationSpec.isReplace(), table.getDataLocation(), replicationSpec, x, writeId, stmtId); } else { x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec)); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ff952b6950..1e7b7c74fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -20,6 +20,7 @@ import com.google.common.base.Splitter; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import com.google.common.math.IntMath; @@ -7296,6 +7297,10 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) boolean overwrite = !qb.getParseInfo().isInsertIntoTable( String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName())); createPreInsertDesc(dest_tab, overwrite); + + ltd = new LoadTableDesc(queryTmpdir, table_desc, partSpec == null ? ImmutableMap.of() : partSpec); + ltd.setInsertOverwrite(overwrite); + ltd.setLoadFileType(overwrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING); } if (dest_tab.isMaterializedView()) { @@ -14394,15 +14399,13 @@ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, } private WriteEntity.WriteType determineWriteType(LoadTableDesc ltd, boolean isNonNativeTable, String dest) { - // Don't know the characteristics of non-native tables, - // and don't have a rational way to guess, so assume the most - // conservative case. - if (isNonNativeTable) { + + if (ltd == null) { return WriteEntity.WriteType.INSERT_OVERWRITE; - } else { - return ((ltd.getLoadFileType() == LoadFileType.REPLACE_ALL || ltd.isInsertOverwrite()) - ? WriteEntity.WriteType.INSERT_OVERWRITE : getWriteType(dest)); } + return ((ltd.getLoadFileType() == LoadFileType.REPLACE_ALL || ltd + .isInsertOverwrite()) ? WriteEntity.WriteType.INSERT_OVERWRITE : getWriteType(dest)); + } private WriteEntity.WriteType getWriteType(String dest) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 088b5cf8cb..70295da960 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -94,7 +94,6 @@ commandType.put(HiveParser.TOK_ALTERVIEW_DROPPROPERTIES, HiveOperation.ALTERVIEW_PROPERTIES); commandType.put(HiveParser.TOK_ALTERVIEW_ADDPARTS, HiveOperation.ALTERTABLE_ADDPARTS); commandType.put(HiveParser.TOK_ALTERVIEW_DROPPARTS, HiveOperation.ALTERTABLE_DROPPARTS); - commandType.put(HiveParser.TOK_ALTERTABLE_OWNER, HiveOperation.ALTERTABLE_OWNER); commandType.put(HiveParser.TOK_ALTERVIEW_RENAME, HiveOperation.ALTERVIEW_RENAME); commandType.put(HiveParser.TOK_ALTERVIEW, HiveOperation.ALTERVIEW_AS); commandType.put(HiveParser.TOK_ALTER_MATERIALIZED_VIEW_REWRITE, @@ -245,7 +244,6 @@ private static BaseSemanticAnalyzer getInternal(QueryState queryState, ASTNode t case HiveParser.TOK_ALTERTABLE_DROPPROPERTIES: case HiveParser.TOK_ALTERTABLE_EXCHANGEPARTITION: case HiveParser.TOK_ALTERTABLE_SKEWED: - case HiveParser.TOK_ALTERTABLE_OWNER: case HiveParser.TOK_ALTERTABLE_DROPCONSTRAINT: case HiveParser.TOK_ALTERTABLE_ADDCONSTRAINT: case HiveParser.TOK_ALTERTABLE_UPDATECOLUMNS: diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java index b3e76b6259..b61a945d94 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java @@ -133,16 +133,27 @@ private void copyMmPath() throws LoginException, IOException { return validPaths; } + /** * This needs the root data directory to which the data needs to be exported to. * The data export here is a list of files either in table/partition that are written to the _files - * in the exportRootDataDir provided. In case of MM/ACID tables, we expect this pathlist to be - * already passed as valid paths by caller based on ValidWriteIdList. So, mmCtx is ignored here. + * in the exportRootDataDir provided. */ private void exportFilesAsList() throws SemanticException, IOException { try (BufferedWriter writer = writer()) { - for (Path dataPath : dataPathList) { - writeFilesList(listFilesInDir(dataPath), writer, AcidUtils.getAcidSubDir(dataPath)); + if (mmCtx != null) { + assert dataPathList.size() == 1; + Path dataPath = dataPathList.get(0); + ValidWriteIdList ids = AcidUtils.getTableValidWriteIdList( + hiveConf, mmCtx.getFqTableName()); + List validPaths = getMmValidPaths(ids, dataPath); + for (Path mmPath : validPaths) { + writeFilesList(listFilesInDir(mmPath), writer, AcidUtils.getAcidSubDir(dataPath)); + } + } else { + for (Path dataPath : dataPathList) { + writeFilesList(listFilesInDir(dataPath), writer, AcidUtils.getAcidSubDir(dataPath)); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index 0b04c0ce85..a767796a94 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -66,7 +66,7 @@ ALTERSKEWEDLOCATION("alter skew location"), ALTERBUCKETNUM("alter bucket number"), ALTERPARTITION("alter partition"), COMPACT("compact"), TRUNCATE("truncate"), MERGEFILES("merge files"), DROPCONSTRAINT("drop constraint"), ADDCONSTRAINT("add constraint"), - UPDATECOLUMNS("update columns"), OWNER("set owner"); + UPDATECOLUMNS("update columns"); ; private final String name; @@ -138,7 +138,6 @@ List checkConstraintsCols; ReplicationSpec replicationSpec; private Long writeId = null; - PrincipalDesc ownerPrincipal; public AlterTableDesc() { } @@ -368,24 +367,6 @@ public AlterTableDesc(String tableName, List primaryKeyCols, op = AlterTableTypes.ADDCONSTRAINT; } - public AlterTableDesc(String tableName, PrincipalDesc ownerPrincipal) { - op = AlterTableTypes.OWNER; - this.oldName = tableName; - this.ownerPrincipal = ownerPrincipal; - } - - /** - * @param ownerPrincipal the owner principal of the table - */ - public void setOwnerPrincipal(PrincipalDesc ownerPrincipal) { - this.ownerPrincipal = ownerPrincipal; - } - - @Explain(displayName="owner") - public PrincipalDesc getOwnerPrincipal() { - return this.ownerPrincipal; - } - @Explain(displayName = "new columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getNewColsString() { return Utilities.getFieldSchemaString(getNewCols()); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java index ba3330bf58..cd4c206a89 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java @@ -51,7 +51,6 @@ ALTERTABLE_ARCHIVE("ALTERTABLE_ARCHIVE", new Privilege[]{Privilege.ALTER_DATA}, null), ALTERTABLE_UNARCHIVE("ALTERTABLE_UNARCHIVE", new Privilege[]{Privilege.ALTER_DATA}, null), ALTERTABLE_PROPERTIES("ALTERTABLE_PROPERTIES", new Privilege[]{Privilege.ALTER_METADATA}, null), - ALTERTABLE_OWNER("ALTERTABLE_OWNER", null, null), ALTERTABLE_SERIALIZER("ALTERTABLE_SERIALIZER", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERPARTITION_SERIALIZER("ALTERPARTITION_SERIALIZER", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_SERDEPROPERTIES("ALTERTABLE_SERDEPROPERTIES", new Privilege[]{Privilege.ALTER_METADATA}, null), diff --git ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java index 09c15f00f0..be5c062ccf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java @@ -53,7 +53,6 @@ ALTERTABLE_ARCHIVE, ALTERTABLE_UNARCHIVE, ALTERTABLE_PROPERTIES, - ALTERTABLE_OWNER, ALTERTABLE_SERIALIZER, ALTERTABLE_PARTCOLTYPE, ALTERTABLE_DROPCONSTRAINT, diff --git ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java index 6137983efd..771dfb7f4c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java @@ -195,8 +195,6 @@ public HivePrivilegeObjectType getObjectType() { op2Priv.put(HiveOperationType.ALTERTABLE_UNARCHIVE, PrivRequirement.newIOPrivRequirement (OWNER_PRIV_AR, OWNER_PRIV_AR)); op2Priv.put(HiveOperationType.ALTERTABLE_PROPERTIES, PrivRequirement.newIOPrivRequirement -(OWNER_PRIV_AR, OWNER_PRIV_AR)); - op2Priv.put(HiveOperationType.ALTERTABLE_OWNER, PrivRequirement.newIOPrivRequirement (OWNER_PRIV_AR, OWNER_PRIV_AR)); op2Priv.put(HiveOperationType.ALTERTABLE_SERIALIZER, PrivRequirement.newIOPrivRequirement (OWNER_PRIV_AR, OWNER_PRIV_AR)); diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 89129f99fe..6bb756cc08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -135,7 +135,7 @@ /** * current configuration. */ - private HiveConf sessionConf; + private final HiveConf sessionConf; /** * silent mode. @@ -308,9 +308,6 @@ public HiveConf getConf() { return sessionConf; } - public void setConf(HiveConf conf) { - this.sessionConf = conf; - } public File getTmpOutputFile() { return tmpOutputFile; diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 952b4abb2a..cef87f5957 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -59,7 +59,6 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ColumnStatsList; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -1055,8 +1054,8 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ cs.setAvgColLen(getAvgColLenOf(conf,cinfo.getObjectInspector(), cinfo.getTypeName())); } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) { cs.setCountDistint(2); - cs.setNumTrues(Math.max(1, numRows/2)); - cs.setNumFalses(Math.max(1, numRows/2)); + cs.setNumTrues(Math.max(1, (long)numRows/2)); + cs.setNumFalses(Math.max(1, (long)numRows/2)); cs.setAvgColLen(JavaDataModel.get().primitive1()); } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { @@ -1118,12 +1117,6 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ // Retrieve stats from metastore String dbName = table.getDbName(); String tabName = table.getTableName(); - if (SemanticAnalyzer.DUMMY_DATABASE.equals(dbName) && - SemanticAnalyzer.DUMMY_TABLE.equals(tabName)) { - // insert into values gets written into insert from select dummy_table - // This table is dummy and has no stats - return null; - } List stats = null; try { List colStat = Hive.get().getTableColumnStatistics( diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEnforceConstraint.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEnforceNotNullConstraint.java similarity index 92% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEnforceConstraint.java rename to ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEnforceNotNullConstraint.java index aa0059b1cf..6c8c6fdae3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEnforceConstraint.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEnforceNotNullConstraint.java @@ -33,9 +33,9 @@ * */ @Description(name = "enforce_constraint", - value = "_FUNC_(x) - Internal UDF to enforce CHECK and NOT NULL constraint", + value = "_FUNC_(x) - Internal UDF to enforce NOT NULL constraint", extended = "For internal use only") -public class GenericUDFEnforceConstraint extends GenericUDF { +public class GenericUDFEnforceNotNullConstraint extends GenericUDF { private final BooleanWritable resultBool = new BooleanWritable(); private transient BooleanObjectInspector boi; @@ -59,7 +59,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { if(!result) { throw new DataConstraintViolationError( - "Either CHECK or NOT NULL constraint violated!"); + "NOT NULL constraint violated!"); } resultBool.set(true); return resultBool; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 142dd1b115..90ff765ad7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -41,10 +41,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeColumnColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeColumnScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeScalarColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeScalarScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar; @@ -52,10 +52,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarCharScalar; diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java index 2663fec9e3..92bcefe5ee 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java @@ -1,21 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - package org.apache.hadoop.hive.ql; import org.apache.hadoop.hive.conf.HiveConf; diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java index 861d9dbb2e..4f1d38403d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java @@ -539,8 +539,6 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } - - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void testUpgrade() throws Exception { int[][] data = {{1,2}, {3, 4}, {5, 6}}; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 791ac82dd7..551bb9ed72 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -47,10 +47,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java deleted file mode 100644 index 8de247c034..0000000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.List; -import java.util.Random; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - - -/** - * Generate random batch source from a random Object[] row source (VectorRandomRowSource). - */ -public class VectorRandomBatchSource { - - // Divide up rows array into different sized batches. - // Modify the rows array for isRepeating / NULL patterns. - // Provide iterator that will fill up a VRB with the divided up rows. - - private final VectorRandomRowSource vectorRandomRowSource; - - private final Object[][] randomRows; - - private final int rowCount; - private final int columnCount; - - private final VectorBatchPatterns vectorBatchPatterns; - - private VectorAssignRow vectorAssignRow; - - private int nextRowIndex; - private int batchCount; - - private VectorRandomBatchSource( - VectorRandomRowSource vectorRandomRowSource, - Object[][] randomRows, - VectorBatchPatterns vectorBatchPatterns, - VectorAssignRow vectorAssignRow) { - this.vectorRandomRowSource = vectorRandomRowSource; - this.randomRows = randomRows; - rowCount = randomRows.length; - Object[] firstRow = randomRows[0]; - columnCount = firstRow.length; - this.vectorBatchPatterns = vectorBatchPatterns; - this.vectorAssignRow = vectorAssignRow; - } - - public static class VectorRandomBatchParameters { - } - - private static class VectorBatchPatterns { - - private final List vectorBatchPatternList; - - VectorBatchPatterns(List vectorBatchPatternList) { - this.vectorBatchPatternList = vectorBatchPatternList; - } - - List getTectorBatchPatternList() { - return vectorBatchPatternList; - } - } - - private static class VectorBatchPattern { - - final int batchSize; - final BitSet bitSet; - - private VectorBatchPattern(int batchSize, BitSet bitSet) { - this.batchSize = batchSize; - this.bitSet = bitSet; - } - - public static VectorBatchPattern createRegularBatch(int batchSize) { - return new VectorBatchPattern(batchSize, null); - } - - public static VectorBatchPattern createRepeatedBatch(int batchSize, BitSet bitSet) { - return new VectorBatchPattern(batchSize, bitSet); - } - - public int getBatchSize() { - return batchSize; - } - - public BitSet getBitSet() { - return bitSet; - } - - public String toString() { - String batchSizeString = "batchSize " + Integer.toString(batchSize); - if (bitSet == null) { - return batchSizeString; - } - long bitMask = bitSet.toLongArray()[0]; - return batchSizeString + " repeating 0x" + Long.toHexString(bitMask); - } - } - - private static VectorBatchPatterns chooseBatchPatterns( - Random random, - VectorRandomRowSource vectorRandomRowSource, - Object[][] randomRows) { - - List vectorBatchPatternList = new ArrayList(); - final int rowCount = randomRows.length; - int rowIndex = 0; - - if (rowCount > 0) { - - final int columnCount = randomRows[0].length; - - // Choose first up to a full batch. - final int regularBatchSize = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); - vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(regularBatchSize)); - rowIndex += regularBatchSize; - - // Have a non-NULL value on hand. - Object[] nonNullRow = new Object[columnCount]; - for (int c = 0; c < columnCount; c++) { - for (int r = 0; r < rowCount; r++) { - Object object = randomRows[r][c]; - if (object != null) { - nonNullRow[c] = object; - break; - } - } - } - - int columnPermutationLimit = Math.min(columnCount, Long.SIZE); - - // Repeated NULL permutations. - long columnPermutation = 1; - while (true) { - if (columnPermutation > columnPermutationLimit) { - break; - } - final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); - if (maximumRowCount == 0) { - break; - } - int randomRowCount = 1 + random.nextInt(maximumRowCount); - final int rowLimit = rowIndex + randomRowCount; - - BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); - - for (int columnNum = bitSet.nextSetBit(0); - columnNum >= 0; - columnNum = bitSet.nextSetBit(columnNum + 1)) { - - // Repeated NULL fill down column. - for (int r = rowIndex; r < rowLimit; r++) { - randomRows[r][columnNum] = null; - } - } - vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet)); - columnPermutation++; - rowIndex = rowLimit; - } - - // Repeated non-NULL permutations. - columnPermutation = 1; - while (true) { - if (columnPermutation > columnPermutationLimit) { - break; - } - final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); - if (maximumRowCount == 0) { - break; - } - int randomRowCount = 1 + random.nextInt(maximumRowCount); - final int rowLimit = rowIndex + randomRowCount; - - BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); - - for (int columnNum = bitSet.nextSetBit(0); - columnNum >= 0; - columnNum = bitSet.nextSetBit(columnNum + 1)) { - - // Repeated non-NULL fill down column. - Object repeatedObject = randomRows[rowIndex][columnNum]; - if (repeatedObject == null) { - repeatedObject = nonNullRow[columnNum]; - } - for (int r = rowIndex; r < rowLimit; r++) { - randomRows[r][columnNum] = repeatedObject; - } - } - vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet)); - columnPermutation++; - rowIndex = rowLimit; - } - - // Remaining batches. - while (true) { - final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); - if (maximumRowCount == 0) { - break; - } - int randomRowCount = 1 + random.nextInt(maximumRowCount); - vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(randomRowCount)); - rowIndex += randomRowCount; - } - } - - // System.out.println("*DEBUG* vectorBatchPatternList" + vectorBatchPatternList.toString()); - - return new VectorBatchPatterns(vectorBatchPatternList); - } - - public static VectorRandomBatchSource createInterestingBatches( - Random random, - VectorRandomRowSource vectorRandomRowSource, - Object[][] randomRows, - VectorRandomBatchParameters vectorRandomBatchParameters) - throws HiveException { - - VectorAssignRow vectorAssignRow = new VectorAssignRow(); - vectorAssignRow.init(vectorRandomRowSource.typeNames()); - - VectorBatchPatterns vectorBatchPatterns = - chooseBatchPatterns(random, vectorRandomRowSource, randomRows); - - return new VectorRandomBatchSource( - vectorRandomRowSource, randomRows, vectorBatchPatterns, vectorAssignRow); - } - - public VectorRandomRowSource getRowSource() { - return vectorRandomRowSource; - } - - public Object[][] getRandomRows() { - return randomRows; - } - - public void resetBatchIteration() { - nextRowIndex = 0; - batchCount = 0; - } - - public int getBatchCount() { - return batchCount; - } - - public int getRowCount() { - return rowCount; - } - - /* - * Patterns of isRepeating columns - * For boolean: tri-state: null, 0, 1 - * For others: null, some-value - * noNulls: sometimes false and there are no NULLs. - * Random selectedInUse, too. - */ - public boolean fillNextBatch(VectorizedRowBatch batch) { - if (nextRowIndex >= rowCount) { - return false; - } - - VectorBatchPattern vectorBatchPattern = - vectorBatchPatterns.getTectorBatchPatternList().get(batchCount); - final int batchSize = vectorBatchPattern.getBatchSize(); - - for (int c = 0; c < columnCount; c++) { - batch.cols[c].reset(); - } - - BitSet bitSet = vectorBatchPattern.getBitSet(); - if (bitSet != null) { - for (int columnNum = bitSet.nextSetBit(0); - columnNum >= 0; - columnNum = bitSet.nextSetBit(columnNum + 1)) { - batch.cols[columnNum].isRepeating = true; - } - } - - int rowIndex = nextRowIndex; - for (int batchIndex = 0; batchIndex < batchSize; batchIndex++) { - for (int c = 0; c < columnCount; c++) { - if (batch.cols[c].isRepeating && batchIndex > 0) { - continue; - } - vectorAssignRow.assignRowColumn(batch, batchIndex, c, randomRows[rowIndex][c]); - } - rowIndex++; - } - batch.size = batchSize; - batchCount++; - nextRowIndex += batchSize; - return true; - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index fa5c775a98..3f993284f1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -24,9 +24,7 @@ import java.util.HashSet; import java.util.List; import java.util.Random; -import java.util.Set; -import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -34,7 +32,6 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -74,7 +71,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; @@ -100,8 +96,6 @@ private TypeInfo[] typeInfos; - private DataTypePhysicalVariation[] dataTypePhysicalVariations; - private List objectInspectorList; // Primitive. @@ -133,10 +127,6 @@ return typeInfos; } - public DataTypePhysicalVariation[] dataTypePhysicalVariations() { - return dataTypePhysicalVariations; - } - public PrimitiveCategory[] primitiveCategories() { return primitiveCategories; } @@ -173,22 +163,7 @@ public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth) { public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth, boolean allowNull) { this.r = r; this.allowNull = allowNull; - chooseSchema(supportedTypes, null, null, null, maxComplexDepth); - } - - public void init(Random r, Set allowedTypeNameSet, int maxComplexDepth, boolean allowNull) { - this.r = r; - this.allowNull = allowNull; - chooseSchema(SupportedTypes.ALL, allowedTypeNameSet, null, null, maxComplexDepth); - } - - public void initExplicitSchema(Random r, List explicitTypeNameList, int maxComplexDepth, - boolean allowNull, List explicitDataTypePhysicalVariationList) { - this.r = r; - this.allowNull = allowNull; - chooseSchema( - SupportedTypes.ALL, null, explicitTypeNameList, explicitDataTypePhysicalVariationList, - maxComplexDepth); + chooseSchema(supportedTypes, maxComplexDepth); } /* @@ -205,7 +180,7 @@ public void initExplicitSchema(Random r, List explicitTypeNameList, int "float", "double", "string", - "char", +// "char", "varchar", "binary", "date", @@ -222,30 +197,27 @@ public void initExplicitSchema(Random r, List explicitTypeNameList, int "map" }; - private String getRandomTypeName(SupportedTypes supportedTypes, Set allowedTypeNameSet) { + private String getRandomTypeName(SupportedTypes supportedTypes) { String typeName = null; - do { - if (r.nextInt(10 ) != 0) { + if (r.nextInt(10 ) != 0) { + typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)]; + } else { + switch (supportedTypes) { + case PRIMITIVES: typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)]; - } else { - switch (supportedTypes) { - case PRIMITIVES: - typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)]; - break; - case ALL_EXCEPT_MAP: - typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length - 1)]; - break; - case ALL: - typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length)]; - break; - } + break; + case ALL_EXCEPT_MAP: + typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length - 1)]; + break; + case ALL: + typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length)]; + break; } - } while (allowedTypeNameSet != null && !allowedTypeNameSet.contains(typeName)); + } return typeName; } - private String getDecoratedTypeName(String typeName, SupportedTypes supportedTypes, - Set allowedTypeNameSet, int depth, int maxDepth) { + private String getDecoratedTypeName(String typeName, SupportedTypes supportedTypes, int depth, int maxDepth) { depth++; if (depth < maxDepth) { supportedTypes = SupportedTypes.PRIMITIVES; @@ -257,32 +229,23 @@ private String getDecoratedTypeName(String typeName, SupportedTypes supportedTyp final int maxLength = 1 + r.nextInt(100); typeName = String.format("varchar(%d)", maxLength); } else if (typeName.equals("decimal")) { - typeName = - String.format( - "decimal(%d,%d)", - HiveDecimal.SYSTEM_DEFAULT_PRECISION, - HiveDecimal.SYSTEM_DEFAULT_SCALE); + typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); } else if (typeName.equals("array")) { - String elementTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); - elementTypeName = - getDecoratedTypeName(elementTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); + String elementTypeName = getRandomTypeName(supportedTypes); + elementTypeName = getDecoratedTypeName(elementTypeName, supportedTypes, depth, maxDepth); typeName = String.format("array<%s>", elementTypeName); } else if (typeName.equals("map")) { - String keyTypeName = getRandomTypeName(SupportedTypes.PRIMITIVES, allowedTypeNameSet); - keyTypeName = - getDecoratedTypeName(keyTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); - String valueTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); - valueTypeName = - getDecoratedTypeName(valueTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); + String keyTypeName = getRandomTypeName(SupportedTypes.PRIMITIVES); + keyTypeName = getDecoratedTypeName(keyTypeName, supportedTypes, depth, maxDepth); + String valueTypeName = getRandomTypeName(supportedTypes); + valueTypeName = getDecoratedTypeName(valueTypeName, supportedTypes, depth, maxDepth); typeName = String.format("map<%s,%s>", keyTypeName, valueTypeName); } else if (typeName.equals("struct")) { final int fieldCount = 1 + r.nextInt(10); final StringBuilder sb = new StringBuilder(); for (int i = 0; i < fieldCount; i++) { - String fieldTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); - fieldTypeName = - getDecoratedTypeName( - fieldTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); + String fieldTypeName = getRandomTypeName(supportedTypes); + fieldTypeName = getDecoratedTypeName(fieldTypeName, supportedTypes, depth, maxDepth); if (i > 0) { sb.append(","); } @@ -297,10 +260,8 @@ private String getDecoratedTypeName(String typeName, SupportedTypes supportedTyp final int fieldCount = 1 + r.nextInt(10); final StringBuilder sb = new StringBuilder(); for (int i = 0; i < fieldCount; i++) { - String fieldTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); - fieldTypeName = - getDecoratedTypeName( - fieldTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); + String fieldTypeName = getRandomTypeName(supportedTypes); + fieldTypeName = getDecoratedTypeName(fieldTypeName, supportedTypes, depth, maxDepth); if (i > 0) { sb.append(","); } @@ -312,29 +273,14 @@ private String getDecoratedTypeName(String typeName, SupportedTypes supportedTyp } private ObjectInspector getObjectInspector(TypeInfo typeInfo) { - return getObjectInspector(typeInfo, DataTypePhysicalVariation.NONE); - } - - private ObjectInspector getObjectInspector(TypeInfo typeInfo, - DataTypePhysicalVariation dataTypePhysicalVariation) { - final ObjectInspector objectInspector; switch (typeInfo.getCategory()) { case PRIMITIVE: { - final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - if (primitiveTypeInfo instanceof DecimalTypeInfo && - dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - objectInspector = - PrimitiveObjectInspectorFactory. - getPrimitiveWritableObjectInspector( - TypeInfoFactory.longTypeInfo); - } else { - objectInspector = - PrimitiveObjectInspectorFactory. - getPrimitiveWritableObjectInspector( - primitiveTypeInfo); - } + final PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) typeInfo; + objectInspector = + PrimitiveObjectInspectorFactory. + getPrimitiveWritableObjectInspector(primitiveType); } break; case MAP: @@ -395,50 +341,35 @@ private ObjectInspector getObjectInspector(TypeInfo typeInfo, return objectInspector; } - private void chooseSchema(SupportedTypes supportedTypes, Set allowedTypeNameSet, - List explicitTypeNameList, - List explicitDataTypePhysicalVariationList, - int maxComplexDepth) { - HashSet hashSet = null; + private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) { + HashSet hashSet = null; final boolean allTypes; - final boolean onlyOne; - if (explicitTypeNameList != null) { - columnCount = explicitTypeNameList.size(); - allTypes = false; - onlyOne = false; - } else if (allowedTypeNameSet != null) { - columnCount = 1 + r.nextInt(20); + final boolean onlyOne = (r.nextInt(100) == 7); + if (onlyOne) { + columnCount = 1; allTypes = false; - onlyOne = false; } else { - onlyOne = (r.nextInt(100) == 7); - if (onlyOne) { - columnCount = 1; - allTypes = false; - } else { - allTypes = r.nextBoolean(); - if (allTypes) { - switch (supportedTypes) { - case ALL: - columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length; - break; - case ALL_EXCEPT_MAP: - columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1; - break; - case PRIMITIVES: - columnCount = possibleHivePrimitiveTypeNames.length; - break; - } - hashSet = new HashSet(); - } else { - columnCount = 1 + r.nextInt(20); + allTypes = r.nextBoolean(); + if (allTypes) { + switch (supportedTypes) { + case ALL: + columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length; + break; + case ALL_EXCEPT_MAP: + columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1; + break; + case PRIMITIVES: + columnCount = possibleHivePrimitiveTypeNames.length; + break; } + hashSet = new HashSet(); + } else { + columnCount = 1 + r.nextInt(20); } } typeNames = new ArrayList(columnCount); categories = new Category[columnCount]; typeInfos = new TypeInfo[columnCount]; - dataTypePhysicalVariations = new DataTypePhysicalVariation[columnCount]; objectInspectorList = new ArrayList(columnCount); primitiveCategories = new PrimitiveCategory[columnCount]; @@ -448,13 +379,9 @@ private void chooseSchema(SupportedTypes supportedTypes, Set allowedType for (int c = 0; c < columnCount; c++) { columnNames.add(String.format("col%d", c)); final String typeName; - DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; - if (explicitTypeNameList != null) { - typeName = explicitTypeNameList.get(c); - dataTypePhysicalVariation = explicitDataTypePhysicalVariationList.get(c); - } else if (onlyOne || allowedTypeNameSet != null) { - typeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); + if (onlyOne) { + typeName = getRandomTypeName(supportedTypes); } else { int typeNum; if (allTypes) { @@ -498,8 +425,7 @@ private void chooseSchema(SupportedTypes supportedTypes, Set allowedType } - String decoratedTypeName = - getDecoratedTypeName(typeName, supportedTypes, allowedTypeNameSet, 0, maxComplexDepth); + String decoratedTypeName = getDecoratedTypeName(typeName, supportedTypes, 0, maxComplexDepth); final TypeInfo typeInfo; try { @@ -509,14 +435,15 @@ private void chooseSchema(SupportedTypes supportedTypes, Set allowedType } typeInfos[c] = typeInfo; - dataTypePhysicalVariations[c] = dataTypePhysicalVariation; final Category category = typeInfo.getCategory(); categories[c] = category; - ObjectInspector objectInspector = getObjectInspector(typeInfo, dataTypePhysicalVariation); + ObjectInspector objectInspector = getObjectInspector(typeInfo); switch (category) { case PRIMITIVE: { final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + objectInspector = PrimitiveObjectInspectorFactory. + getPrimitiveWritableObjectInspector(primitiveTypeInfo); primitiveTypeInfos[c] = primitiveTypeInfo; PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); primitiveCategories[c] = primitiveCategory; @@ -571,46 +498,27 @@ private void chooseSchema(SupportedTypes supportedTypes, Set allowedType } public Object[] randomPrimitiveRow(int columnCount) { - return randomPrimitiveRow(columnCount, r, primitiveTypeInfos, dataTypePhysicalVariations); + return randomPrimitiveRow(columnCount, r, primitiveTypeInfos); } public static Object[] randomPrimitiveRow(int columnCount, Random r, - PrimitiveTypeInfo[] primitiveTypeInfos, - DataTypePhysicalVariation[] dataTypePhysicalVariations) { + PrimitiveTypeInfo[] primitiveTypeInfos) { final Object row[] = new Object[columnCount]; for (int c = 0; c < columnCount; c++) { - row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c], dataTypePhysicalVariations[c]); + row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c]); } return row; } public static Object[] randomWritablePrimitiveRow(int columnCount, Random r, PrimitiveTypeInfo[] primitiveTypeInfos) { - return randomWritablePrimitiveRow(columnCount, r, primitiveTypeInfos, null); - } - - public static Object[] randomWritablePrimitiveRow(int columnCount, Random r, - PrimitiveTypeInfo[] primitiveTypeInfos, - DataTypePhysicalVariation[] dataTypePhysicalVariations) { final Object row[] = new Object[columnCount]; for (int c = 0; c < columnCount; c++) { final PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[c]; - final DataTypePhysicalVariation dataTypePhysicalVariation = - (dataTypePhysicalVariations != null ? - dataTypePhysicalVariations[c] : DataTypePhysicalVariation.NONE); - final ObjectInspector objectInspector; - if (primitiveTypeInfo instanceof DecimalTypeInfo && - dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - objectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - TypeInfoFactory.longTypeInfo); - } else { - objectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - primitiveTypeInfo); - } + final ObjectInspector objectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo); final Object object = randomPrimitiveObject(r, primitiveTypeInfo); row[c] = getWritablePrimitiveObject(primitiveTypeInfo, objectInspector, object); } @@ -667,14 +575,6 @@ public void sort(Object[][] rows) { public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeInfo, ObjectInspector objectInspector, Object object) { - return - getWritablePrimitiveObject( - primitiveTypeInfo, objectInspector, DataTypePhysicalVariation.NONE, object); - } - - public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeInfo, - ObjectInspector objectInspector, DataTypePhysicalVariation dataTypePhysicalVariation, - Object object) { switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: @@ -696,17 +596,17 @@ public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeI case STRING: return ((WritableStringObjectInspector) objectInspector).create((String) object); case CHAR: - { - WritableHiveCharObjectInspector writableCharObjectInspector = - new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); - return writableCharObjectInspector.create((HiveChar) object); - } + { + WritableHiveCharObjectInspector writableCharObjectInspector = + new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); + return writableCharObjectInspector.create((HiveChar) object); + } case VARCHAR: - { - WritableHiveVarcharObjectInspector writableVarcharObjectInspector = - new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); - return writableVarcharObjectInspector.create((HiveVarchar) object); - } + { + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); + return writableVarcharObjectInspector.create((HiveVarchar) object); + } case BINARY: return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create((byte[]) object); case TIMESTAMP: @@ -716,55 +616,31 @@ public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeI case INTERVAL_DAY_TIME: return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create((HiveIntervalDayTime) object); case DECIMAL: - { - if (dataTypePhysicalVariation == dataTypePhysicalVariation.DECIMAL_64) { - final long value; - if (object instanceof HiveDecimal) { - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; - value = new HiveDecimalWritable((HiveDecimal) object).serialize64( - decimalTypeInfo.getScale()); - } else { - value = (long) object; - } - return ((WritableLongObjectInspector) objectInspector).create(value); - } else { - WritableHiveDecimalObjectInspector writableDecimalObjectInspector = - new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); - return writableDecimalObjectInspector.create((HiveDecimal) object); - } - } + { + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); + return writableDecimalObjectInspector.create((HiveDecimal) object); + } default: throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory()); } } public Object randomWritable(int column) { - return randomWritable( - typeInfos[column], objectInspectorList.get(column), dataTypePhysicalVariations[column], - allowNull); + return randomWritable(typeInfos[column], objectInspectorList.get(column)); } public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector) { - return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); - } - - public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, - boolean allowNull) { - return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); + return randomWritable(typeInfo, objectInspector, allowNull); } - public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, - DataTypePhysicalVariation dataTypePhysicalVariation, boolean allowNull) { + public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, boolean allowNull) { switch (typeInfo.getCategory()) { case PRIMITIVE: { - if (allowNull && r.nextInt(20) == 0) { - return null; - } final Object object = randomPrimitiveObject(r, (PrimitiveTypeInfo) typeInfo); - return getWritablePrimitiveObject( - (PrimitiveTypeInfo) typeInfo, objectInspector, dataTypePhysicalVariation, object); + return getWritablePrimitiveObject((PrimitiveTypeInfo) typeInfo, objectInspector, object); } case LIST: { @@ -904,11 +780,6 @@ public Object randomPrimitiveObject(int column) { } public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo) { - return randomPrimitiveObject(r, primitiveTypeInfo, DataTypePhysicalVariation.NONE); - } - - public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo, - DataTypePhysicalVariation dataTypePhysicalVariation) { switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: @@ -942,14 +813,9 @@ public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitive case INTERVAL_DAY_TIME: return getRandIntervalDayTime(r); case DECIMAL: - { - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; - HiveDecimal hiveDecimal = getRandHiveDecimal(r, decimalTypeInfo); - if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - return new HiveDecimalWritable(hiveDecimal).serialize64(decimalTypeInfo.getScale()); - } - return hiveDecimal; - } + { + return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); + } default: throw new Error("Unknown primitive category " + primitiveTypeInfo.getCategory()); } @@ -1003,13 +869,7 @@ public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTy sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale)); } - HiveDecimal dec = HiveDecimal.create(sb.toString()); - dec = - HiveDecimal.enforcePrecisionScale( - dec, decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale()); - if (dec != null) { - return dec; - } + return HiveDecimal.create(sb.toString()); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java deleted file mode 100644 index c52ca19a39..0000000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java +++ /dev/null @@ -1,444 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.LongWritable; - -import junit.framework.Assert; - -import org.junit.Test; - -public class TestVectorIfStatement { - - @Test - public void testBoolean() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "boolean"); - } - - @Test - public void testInt() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "int"); - } - - @Test - public void testBigInt() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "bigint"); - } - - @Test - public void testString() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "string"); - } - - @Test - public void testTimestamp() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "timestamp"); - } - - @Test - public void testDate() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "date"); - } - - @Test - public void testIntervalDayTime() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "interval_day_time"); - } - - @Test - public void testIntervalYearMonth() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "interval_year_month"); - } - - @Test - public void testDouble() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "double"); - } - - @Test - public void testChar() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "char(10)"); - } - - @Test - public void testVarchar() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "varchar(15)"); - } - - @Test - public void testBinary() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "binary"); - } - - @Test - public void testDecimalLarge() throws Exception { - Random random = new Random(9300); - - doIfTests(random, "decimal(20,8)"); - } - - @Test - public void testDecimalSmall() throws Exception { - Random random = new Random(12882); - - doIfTests(random, "decimal(10,4)"); - } - - public enum IfStmtTestMode { - ROW_MODE, - ADAPTOR_WHEN, - VECTOR_EXPRESSION; - - static final int count = values().length; - } - - public enum ColumnScalarMode { - COLUMN_COLUMN, - COLUMN_SCALAR, - SCALAR_COLUMN, - SCALAR_SCALAR; - - static final int count = values().length; - } - - private void doIfTests(Random random, String typeName) - throws Exception { - doIfTests(random, typeName, DataTypePhysicalVariation.NONE); - } - - private void doIfTests(Random random, String typeName, - DataTypePhysicalVariation dataTypePhysicalVariation) - throws Exception { - for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { - doIfTestsWithDiffColumnScalar( - random, typeName, columnScalarMode, dataTypePhysicalVariation); - } - } - - private void doIfTestsWithDiffColumnScalar(Random random, String typeName, - ColumnScalarMode columnScalarMode, DataTypePhysicalVariation dataTypePhysicalVariation) - throws Exception { - - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - - boolean isDecimal64 = (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64); - final int decimal64Scale = - (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0); - - List explicitTypeNameList = new ArrayList(); - List explicitDataTypePhysicalVariationList = new ArrayList(); - explicitTypeNameList.add("boolean"); - explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); - if (columnScalarMode != ColumnScalarMode.SCALAR_SCALAR) { - explicitTypeNameList.add(typeName); - explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN) { - explicitTypeNameList.add(typeName); - explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); - } - } - - VectorRandomRowSource rowSource = new VectorRandomRowSource(); - - rowSource.initExplicitSchema( - random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, - explicitDataTypePhysicalVariationList); - - List columns = new ArrayList(); - columns.add("col0"); // The boolean predicate. - - ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Boolean.class, "col0", "table", false); - int columnNum = 1; - ExprNodeDesc col2Expr; - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || - columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { - String columnName = "col" + (columnNum++); - col2Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); - columns.add(columnName); - } else { - Object scalar1Object = - VectorRandomRowSource.randomPrimitiveObject( - random, (PrimitiveTypeInfo) typeInfo); - col2Expr = new ExprNodeConstantDesc(typeInfo, scalar1Object); - } - ExprNodeDesc col3Expr; - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || - columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { - String columnName = "col" + (columnNum++); - col3Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); - columns.add(columnName); - } else { - Object scalar2Object = - VectorRandomRowSource.randomPrimitiveObject( - random, (PrimitiveTypeInfo) typeInfo); - col3Expr = new ExprNodeConstantDesc(typeInfo, scalar2Object); - } - - List children = new ArrayList(); - children.add(col1Expr); - children.add(col2Expr); - children.add(col3Expr); - - //---------------------------------------------------------------------------------------------- - - String[] columnNames = columns.toArray(new String[0]); - - String[] outputScratchTypeNames = new String[] { typeName }; - DataTypePhysicalVariation[] outputDataTypePhysicalVariations = - new DataTypePhysicalVariation[] { dataTypePhysicalVariation }; - - VectorizedRowBatchCtx batchContext = - new VectorizedRowBatchCtx( - columnNames, - rowSource.typeInfos(), - rowSource.dataTypePhysicalVariations(), - /* dataColumnNums */ null, - /* partitionColumnCount */ 0, - /* virtualColumnCount */ 0, - /* neededVirtualColumns */ null, - outputScratchTypeNames, - outputDataTypePhysicalVariations); - - Object[][] randomRows = rowSource.randomRows(100000); - - VectorRandomBatchSource batchSource = - VectorRandomBatchSource.createInterestingBatches( - random, - rowSource, - randomRows, - null); - - final int rowCount = randomRows.length; - Object[][] resultObjectsArray = new Object[IfStmtTestMode.count][]; - for (int i = 0; i < IfStmtTestMode.count; i++) { - - Object[] resultObjects = new Object[rowCount]; - resultObjectsArray[i] = resultObjects; - - IfStmtTestMode ifStmtTestMode = IfStmtTestMode.values()[i]; - switch (ifStmtTestMode) { - case ROW_MODE: - doRowIfTest( - typeInfo, columns, children, randomRows, rowSource.rowStructObjectInspector(), - resultObjects); - break; - case ADAPTOR_WHEN: - case VECTOR_EXPRESSION: - doVectorIfTest( - typeInfo, - columns, - rowSource.typeInfos(), - rowSource.dataTypePhysicalVariations(), - children, - ifStmtTestMode, - columnScalarMode, - batchSource, - batchContext, - resultObjects); - break; - default: - throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode); - } - } - - for (int i = 0; i < rowCount; i++) { - // Row-mode is the expected value. - Object expectedResult = resultObjectsArray[0][i]; - - for (int v = 1; v < IfStmtTestMode.count; v++) { - Object vectorResult = resultObjectsArray[v][i]; - if (expectedResult == null || vectorResult == null) { - if (expectedResult != null || vectorResult != null) { - Assert.fail( - "Row " + i + " " + IfStmtTestMode.values()[v] + - " " + columnScalarMode + - " result is NULL " + (vectorResult == null) + - " does not match row-mode expected result is NULL " + (expectedResult == null)); - } - } else { - - if (isDecimal64 && expectedResult instanceof LongWritable) { - - HiveDecimalWritable expectedHiveDecimalWritable = new HiveDecimalWritable(0); - expectedHiveDecimalWritable.deserialize64( - ((LongWritable) expectedResult).get(), decimal64Scale); - expectedResult = expectedHiveDecimalWritable; - } - - if (!expectedResult.equals(vectorResult)) { - Assert.fail( - "Row " + i + " " + IfStmtTestMode.values()[v] + - " " + columnScalarMode + - " result " + vectorResult.toString() + - " (" + vectorResult.getClass().getSimpleName() + ")" + - " does not match row-mode expected result " + expectedResult.toString() + - " (" + expectedResult.getClass().getSimpleName() + ")"); - } - } - } - } - } - - private void doRowIfTest(TypeInfo typeInfo, List columns, List children, - Object[][] randomRows, ObjectInspector rowInspector, Object[] resultObjects) throws Exception { - - GenericUDF udf = new GenericUDFIf(); - - ExprNodeGenericFuncDesc exprDesc = - new ExprNodeGenericFuncDesc(typeInfo, udf, children); - HiveConf hiveConf = new HiveConf(); - ExprNodeEvaluator evaluator = - ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); - evaluator.initialize(rowInspector); - - final int rowCount = randomRows.length; - for (int i = 0; i < rowCount; i++) { - Object[] row = randomRows[i]; - Object result = evaluator.evaluate(row); - resultObjects[i] = result; - } - } - - private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, - VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, Object[] resultObjects) { - // UNDONE: selectedInUse - for (int i = 0; i < batch.size; i++) { - resultVectorExtractRow.extractRow(batch, i, scrqtchRow); - - // UNDONE: Need to copy the object. - resultObjects[rowIndex++] = scrqtchRow[0]; - } - } - - private void doVectorIfTest(TypeInfo typeInfo, - List columns, - TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, - List children, - IfStmtTestMode ifStmtTestMode, ColumnScalarMode columnScalarMode, - VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, - Object[] resultObjects) - throws Exception { - - GenericUDF udf; - switch (ifStmtTestMode) { - case VECTOR_EXPRESSION: - udf = new GenericUDFIf(); - break; - case ADAPTOR_WHEN: - udf = new GenericUDFWhen(); - break; - default: - throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode); - } - - ExprNodeGenericFuncDesc exprDesc = - new ExprNodeGenericFuncDesc(typeInfo, udf, children); - - String ifExprMode = (ifStmtTestMode != IfStmtTestMode.VECTOR_EXPRESSION ? "adaptor" : "good"); - HiveConf hiveConf = new HiveConf(); - hiveConf.setVar(HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE, ifExprMode); - - VectorizationContext vectorizationContext = - new VectorizationContext( - "name", - columns, - Arrays.asList(typeInfos), - Arrays.asList(dataTypePhysicalVariations), - hiveConf); - VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); - - VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); - - VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); - resultVectorExtractRow.init(new TypeInfo[] { typeInfo }, new int[] { columns.size() }); - Object[] scrqtchRow = new Object[1]; - - /* - System.out.println( - "*DEBUG* typeInfo " + typeInfo.toString() + - " ifStmtTestMode " + ifStmtTestMode + - " columnScalarMode " + columnScalarMode + - " vectorExpression " + vectorExpression.getClass().getSimpleName()); - */ - - batchSource.resetBatchIteration(); - int rowIndex = 0; - while (true) { - if (!batchSource.fillNextBatch(batch)) { - break; - } - vectorExpression.evaluate(batch); - extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, resultObjects); - rowIndex += batch.size; - } - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java deleted file mode 100644 index bcb7a88258..0000000000 --- ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java +++ /dev/null @@ -1,815 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.arrow; - -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.AbstractSerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.junit.Before; -import org.junit.Test; - -import java.sql.Timestamp; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Properties; -import java.util.Random; -import java.util.Set; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -public class TestArrowColumnarBatchSerDe { - private Configuration conf; - - private final static Object[][] INTEGER_ROWS = { - {byteW(0), shortW(0), intW(0), longW(0)}, - {byteW(1), shortW(1), intW(1), longW(1)}, - {byteW(-1), shortW(-1), intW(-1), longW(-1)}, - {byteW(Byte.MIN_VALUE), shortW(Short.MIN_VALUE), intW(Integer.MIN_VALUE), - longW(Long.MIN_VALUE)}, - {byteW(Byte.MAX_VALUE), shortW(Short.MAX_VALUE), intW(Integer.MAX_VALUE), - longW(Long.MAX_VALUE)}, - {null, null, null, null}, - }; - - private final static Object[][] FLOAT_ROWS = { - {floatW(0f), doubleW(0d)}, - {floatW(1f), doubleW(1d)}, - {floatW(-1f), doubleW(-1d)}, - {floatW(Float.MIN_VALUE), doubleW(Double.MIN_VALUE)}, - {floatW(-Float.MIN_VALUE), doubleW(-Double.MIN_VALUE)}, - {floatW(Float.MAX_VALUE), doubleW(Double.MAX_VALUE)}, - {floatW(-Float.MAX_VALUE), doubleW(-Double.MAX_VALUE)}, - {floatW(Float.POSITIVE_INFINITY), doubleW(Double.POSITIVE_INFINITY)}, - {floatW(Float.NEGATIVE_INFINITY), doubleW(Double.NEGATIVE_INFINITY)}, - {null, null}, - }; - - private final static Object[][] STRING_ROWS = { - {text(""), charW("", 10), varcharW("", 10)}, - {text("Hello"), charW("Hello", 10), varcharW("Hello", 10)}, - {text("world!"), charW("world!", 10), varcharW("world!", 10)}, - {null, null, null}, - }; - - private final static long NOW = System.currentTimeMillis(); - private final static Object[][] DTI_ROWS = { - { - new DateWritable(DateWritable.millisToDays(NOW)), - new TimestampWritable(new Timestamp(NOW)), - new HiveIntervalYearMonthWritable(new HiveIntervalYearMonth(1, 2)), - new HiveIntervalDayTimeWritable(new HiveIntervalDayTime(1, 2, 3, 4, 5_000_000)) - }, - {null, null, null, null}, - }; - - private final static Object[][] DECIMAL_ROWS = { - {decimalW(HiveDecimal.ZERO)}, - {decimalW(HiveDecimal.ONE)}, - {decimalW(HiveDecimal.ONE.negate())}, - {decimalW(HiveDecimal.create("0.000001"))}, - {decimalW(HiveDecimal.create("100000"))}, - {null}, - }; - - private final static Object[][] BOOLEAN_ROWS = { - {new BooleanWritable(true)}, - {new BooleanWritable(false)}, - {null}, - }; - - private final static Object[][] BINARY_ROWS = { - {new BytesWritable("".getBytes())}, - {new BytesWritable("Hello".getBytes())}, - {new BytesWritable("world!".getBytes())}, - {null}, - }; - - @Before - public void setUp() { - conf = new Configuration(); - } - - private static ByteWritable byteW(int value) { - return new ByteWritable((byte) value); - } - - private static ShortWritable shortW(int value) { - return new ShortWritable((short) value); - } - - private static IntWritable intW(int value) { - return new IntWritable(value); - } - - private static LongWritable longW(long value) { - return new LongWritable(value); - } - - private static FloatWritable floatW(float value) { - return new FloatWritable(value); - } - - private static DoubleWritable doubleW(double value) { - return new DoubleWritable(value); - } - - private static Text text(String value) { - return new Text(value); - } - - private static HiveCharWritable charW(String value, int length) { - return new HiveCharWritable(new HiveChar(value, length)); - } - - private static HiveVarcharWritable varcharW(String value, int length) { - return new HiveVarcharWritable(new HiveVarchar(value, length)); - } - - private static HiveDecimalWritable decimalW(HiveDecimal value) { - return new HiveDecimalWritable(value); - } - - private void initAndSerializeAndDeserialize(String[][] schema, Object[][] rows) throws SerDeException { - AbstractSerDe serDe = new ArrowColumnarBatchSerDe(); - StructObjectInspector rowOI = initSerDe(serDe, schema); - serializeAndDeserialize(serDe, rows, rowOI); - } - - private StructObjectInspector initSerDe(AbstractSerDe serDe, String[][] schema) - throws SerDeException { - List fieldNameList = newArrayList(); - List fieldTypeList = newArrayList(); - List typeInfoList = newArrayList(); - - for (String[] nameAndType : schema) { - String name = nameAndType[0]; - String type = nameAndType[1]; - fieldNameList.add(name); - fieldTypeList.add(type); - typeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(type)); - } - - String fieldNames = Joiner.on(',').join(fieldNameList); - String fieldTypes = Joiner.on(',').join(fieldTypeList); - - Properties schemaProperties = new Properties(); - schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); - schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); - SerDeUtils.initializeSerDe(serDe, conf, schemaProperties, null); - return (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( - TypeInfoFactory.getStructTypeInfo(fieldNameList, typeInfoList)); - } - - private void serializeAndDeserialize(AbstractSerDe serDe, Object[][] rows, - StructObjectInspector rowOI) throws SerDeException { - Writable serialized = null; - for (Object[] row : rows) { - serialized = serDe.serialize(row, rowOI); - } - // Pass null to complete a batch - if (serialized == null) { - serialized = serDe.serialize(null, rowOI); - } - final Object[][] deserializedRows = (Object[][]) serDe.deserialize(serialized); - - for (int rowIndex = 0; rowIndex < Math.min(deserializedRows.length, rows.length); rowIndex++) { - final Object[] row = rows[rowIndex]; - final Object[] deserializedRow = deserializedRows[rowIndex]; - assertEquals(row.length, deserializedRow.length); - - final List fields = rowOI.getAllStructFieldRefs(); - for (int fieldIndex = 0; fieldIndex < fields.size(); fieldIndex++) { - final StructField field = fields.get(fieldIndex); - final ObjectInspector fieldObjInspector = field.getFieldObjectInspector(); - switch (fieldObjInspector.getCategory()) { - case PRIMITIVE: - final PrimitiveObjectInspector primitiveObjInspector = - (PrimitiveObjectInspector) fieldObjInspector; - switch (primitiveObjInspector.getPrimitiveCategory()) { - case STRING: - case VARCHAR: - case CHAR: - assertEquals(Objects.toString(row[fieldIndex]), - Objects.toString(deserializedRow[fieldIndex])); - break; - default: - assertEquals(row[fieldIndex], deserializedRow[fieldIndex]); - break; - } - break; - case STRUCT: - final Object[] rowStruct = (Object[]) row[fieldIndex]; - final List deserializedRowStruct = (List) deserializedRow[fieldIndex]; - assertArrayEquals(rowStruct, deserializedRowStruct.toArray()); - break; - case LIST: - case UNION: - assertEquals(row[fieldIndex], deserializedRow[fieldIndex]); - break; - case MAP: - Map rowMap = (Map) row[fieldIndex]; - Map deserializedRowMap = (Map) deserializedRow[fieldIndex]; - Set rowMapKeySet = rowMap.keySet(); - Set deserializedRowMapKeySet = deserializedRowMap.keySet(); - assertTrue(rowMapKeySet.containsAll(deserializedRowMapKeySet)); - assertTrue(deserializedRowMapKeySet.containsAll(rowMapKeySet)); - for (Object key : rowMapKeySet) { - assertEquals(rowMap.get(key), deserializedRowMap.get(key)); - } - break; - } - } - } - } - - @Test - public void testComprehensive() throws SerDeException { - String[][] schema = { - {"datatypes.c1", "int"}, - {"datatypes.c2", "boolean"}, - {"datatypes.c3", "double"}, - {"datatypes.c4", "string"}, - {"datatypes.c5", "array"}, - {"datatypes.c6", "map"}, - {"datatypes.c7", "map"}, - {"datatypes.c8", "struct"}, - {"datatypes.c9", "tinyint"}, - {"datatypes.c10", "smallint"}, - {"datatypes.c11", "float"}, - {"datatypes.c12", "bigint"}, - {"datatypes.c13", "array>"}, - {"datatypes.c14", "map>"}, - {"datatypes.c15", "struct>"}, - {"datatypes.c16", "array,n:int>>"}, - {"datatypes.c17", "timestamp"}, - {"datatypes.c18", "decimal(16,7)"}, - {"datatypes.c19", "binary"}, - {"datatypes.c20", "date"}, - {"datatypes.c21", "varchar(20)"}, - {"datatypes.c22", "char(15)"}, - {"datatypes.c23", "binary"}, - }; - - Object[][] comprehensiveRows = { - { - intW(0), // c1:int - new BooleanWritable(false), // c2:boolean - doubleW(0), // c3:double - text("Hello"), // c4:string - newArrayList(intW(0), intW(1), intW(2)), // c5:array - Maps.toMap( - newArrayList(intW(0), intW(1), intW(2)), - input -> text("Number " + input)), // c6:map - Maps.toMap( - newArrayList(text("apple"), text("banana"), text("carrot")), - input -> text(input.toString().toUpperCase())), // c7:map - new Object[] {text("0"), intW(1), doubleW(2)}, // c8:struct - byteW(0), // c9:tinyint - shortW(0), // c10:smallint - floatW(0), // c11:float - longW(0), // c12:bigint - newArrayList( - newArrayList(text("a"), text("b"), text("c")), - newArrayList(text("A"), text("B"), text("C"))), // c13:array> - Maps.toMap( - newArrayList(intW(0), intW(1), intW(2)), - x -> Maps.toMap( - newArrayList(x, intW(x.get() * 2)), - y -> y)), // c14:map> - new Object[] { - intW(0), - newArrayList( - intW(1), - text("Hello"))}, // c15:struct> - Collections.singletonList( - newArrayList( - Maps.toMap( - newArrayList(text("hello")), - input -> text(input.toString().toUpperCase())), - intW(0))), // c16:array,n:int>> - new TimestampWritable(new Timestamp(NOW)), // c17:timestamp - decimalW(HiveDecimal.create(0, 0)), // c18:decimal(16,7) - new BytesWritable("Hello".getBytes()), // c19:binary - new DateWritable(123), // c20:date - varcharW("x", 20), // c21:varchar(20) - charW("y", 15), // c22:char(15) - new BytesWritable("world!".getBytes()), // c23:binary - }, - }; - - initAndSerializeAndDeserialize(schema, comprehensiveRows); - } - - private List newArrayList(E ... elements) { - return Lists.newArrayList(elements); - } - - @Test - public void testPrimitiveInteger() throws SerDeException { - String[][] schema = { - {"tinyint1", "tinyint"}, - {"smallint1", "smallint"}, - {"int1", "int"}, - {"bigint1", "bigint"} - }; - - initAndSerializeAndDeserialize(schema, INTEGER_ROWS); - } - - @Test - public void testPrimitiveBigInt10000() throws SerDeException { - String[][] schema = { - {"bigint1", "bigint"} - }; - - final int batchSize = 1000; - final Object[][] integerRows = new Object[batchSize][]; - final AbstractSerDe serDe = new ArrowColumnarBatchSerDe(); - StructObjectInspector rowOI = initSerDe(serDe, schema); - - for (int j = 0; j < 10; j++) { - for (int i = 0; i < batchSize; i++) { - integerRows[i] = new Object[] {longW(i + j * batchSize)}; - } - - serializeAndDeserialize(serDe, integerRows, rowOI); - } - } - - @Test - public void testPrimitiveBigIntRandom() { - try { - String[][] schema = { - {"bigint1", "bigint"} - }; - - final AbstractSerDe serDe = new ArrowColumnarBatchSerDe(); - StructObjectInspector rowOI = initSerDe(serDe, schema); - - final Random random = new Random(); - for (int j = 0; j < 1000; j++) { - final int batchSize = random.nextInt(1000); - final Object[][] integerRows = new Object[batchSize][]; - for (int i = 0; i < batchSize; i++) { - integerRows[i] = new Object[] {longW(random.nextLong())}; - } - - serializeAndDeserialize(serDe, integerRows, rowOI); - } - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void testPrimitiveFloat() throws SerDeException { - String[][] schema = { - {"float1", "float"}, - {"double1", "double"}, - }; - - initAndSerializeAndDeserialize(schema, FLOAT_ROWS); - } - - @Test(expected = AssertionError.class) - public void testPrimitiveFloatNaN() throws SerDeException { - String[][] schema = { - {"float1", "float"}, - }; - - Object[][] rows = {{new FloatWritable(Float.NaN)}}; - - initAndSerializeAndDeserialize(schema, rows); - } - - @Test(expected = AssertionError.class) - public void testPrimitiveDoubleNaN() throws SerDeException { - String[][] schema = { - {"double1", "double"}, - }; - - Object[][] rows = {{new DoubleWritable(Double.NaN)}}; - - initAndSerializeAndDeserialize(schema, rows); - } - - @Test - public void testPrimitiveString() throws SerDeException { - String[][] schema = { - {"string1", "string"}, - {"char1", "char(10)"}, - {"varchar1", "varchar(10)"}, - }; - - initAndSerializeAndDeserialize(schema, STRING_ROWS); - } - - @Test - public void testPrimitiveDTI() throws SerDeException { - String[][] schema = { - {"date1", "date"}, - {"timestamp1", "timestamp"}, - {"interval_year_month1", "interval_year_month"}, - {"interval_day_time1", "interval_day_time"}, - }; - - initAndSerializeAndDeserialize(schema, DTI_ROWS); - } - - @Test - public void testPrimitiveDecimal() throws SerDeException { - String[][] schema = { - {"decimal1", "decimal(38,10)"}, - }; - - initAndSerializeAndDeserialize(schema, DECIMAL_ROWS); - } - - @Test - public void testPrimitiveBoolean() throws SerDeException { - String[][] schema = { - {"boolean1", "boolean"}, - }; - - initAndSerializeAndDeserialize(schema, BOOLEAN_ROWS); - } - - @Test - public void testPrimitiveBinary() throws SerDeException { - String[][] schema = { - {"binary1", "binary"}, - }; - - initAndSerializeAndDeserialize(schema, BINARY_ROWS); - } - - private List[][] toList(Object[][] rows) { - List[][] array = new List[rows.length][]; - for (int rowIndex = 0; rowIndex < rows.length; rowIndex++) { - Object[] row = rows[rowIndex]; - array[rowIndex] = new List[row.length]; - for (int fieldIndex = 0; fieldIndex < row.length; fieldIndex++) { - array[rowIndex][fieldIndex] = newArrayList(row[fieldIndex]); - } - } - return array; - } - - @Test - public void testListInteger() throws SerDeException { - String[][] schema = { - {"tinyint_list", "array"}, - {"smallint_list", "array"}, - {"int_list", "array"}, - {"bigint_list", "array"}, - }; - - initAndSerializeAndDeserialize(schema, toList(INTEGER_ROWS)); - } - - @Test - public void testListFloat() throws SerDeException { - String[][] schema = { - {"float_list", "array"}, - {"double_list", "array"}, - }; - - initAndSerializeAndDeserialize(schema, toList(FLOAT_ROWS)); - } - - @Test - public void testListString() throws SerDeException { - String[][] schema = { - {"string_list", "array"}, - {"char_list", "array"}, - {"varchar_list", "array"}, - }; - - initAndSerializeAndDeserialize(schema, toList(STRING_ROWS)); - } - - @Test - public void testListDTI() throws SerDeException { - String[][] schema = { - {"date_list", "array"}, - {"timestamp_list", "array"}, - {"interval_year_month_list", "array"}, - {"interval_day_time_list", "array"}, - }; - - initAndSerializeAndDeserialize(schema, toList(DTI_ROWS)); - } - - @Test - public void testListBoolean() throws SerDeException { - String[][] schema = { - {"boolean_list", "array"}, - }; - - initAndSerializeAndDeserialize(schema, toList(BOOLEAN_ROWS)); - } - - @Test - public void testListBinary() throws SerDeException { - String[][] schema = { - {"binary_list", "array"}, - }; - - initAndSerializeAndDeserialize(schema, toList(BINARY_ROWS)); - } - - private StandardUnionObjectInspector.StandardUnion union(int tag, Object object) { - return new StandardUnionObjectInspector.StandardUnion((byte) tag, object); - } - - public void testUnionInteger() throws SerDeException { - String[][] schema = { - {"int_union", "uniontype"}, - }; - - StandardUnionObjectInspector.StandardUnion[][] integerUnions = { - {union(0, byteW(0))}, - {union(1, shortW(1))}, - {union(2, intW(2))}, - {union(3, longW(3))}, - }; - - initAndSerializeAndDeserialize(schema, integerUnions); - } - - public void testUnionFloat() throws SerDeException { - String[][] schema = { - {"float_union", "uniontype"}, - }; - - StandardUnionObjectInspector.StandardUnion[][] floatUnions = { - {union(0, floatW(0f))}, - {union(1, doubleW(1d))}, - }; - - initAndSerializeAndDeserialize(schema, floatUnions); - } - - public void testUnionString() throws SerDeException { - String[][] schema = { - {"string_union", "uniontype"}, - }; - - StandardUnionObjectInspector.StandardUnion[][] stringUnions = { - {union(0, text("Hello"))}, - {union(1, intW(1))}, - }; - - initAndSerializeAndDeserialize(schema, stringUnions); - } - - public void testUnionChar() throws SerDeException { - String[][] schema = { - {"char_union", "uniontype"}, - }; - - StandardUnionObjectInspector.StandardUnion[][] charUnions = { - {union(0, charW("Hello", 10))}, - {union(1, intW(1))}, - }; - - initAndSerializeAndDeserialize(schema, charUnions); - } - - public void testUnionVarchar() throws SerDeException { - String[][] schema = { - {"varchar_union", "uniontype"}, - }; - - StandardUnionObjectInspector.StandardUnion[][] varcharUnions = { - {union(0, varcharW("Hello", 10))}, - {union(1, intW(1))}, - }; - - initAndSerializeAndDeserialize(schema, varcharUnions); - } - - public void testUnionDTI() throws SerDeException { - String[][] schema = { - {"date_union", "uniontype"}, - }; - long NOW = System.currentTimeMillis(); - - StandardUnionObjectInspector.StandardUnion[][] dtiUnions = { - {union(0, new DateWritable(DateWritable.millisToDays(NOW)))}, - {union(1, new TimestampWritable(new Timestamp(NOW)))}, - {union(2, new HiveIntervalYearMonthWritable(new HiveIntervalYearMonth(1, 2)))}, - {union(3, new HiveIntervalDayTimeWritable(new HiveIntervalDayTime(1, 2, 3, 4, 5_000_000)))}, - }; - - initAndSerializeAndDeserialize(schema, dtiUnions); - } - - public void testUnionBooleanBinary() throws SerDeException { - String[][] schema = { - {"boolean_union", "uniontype"}, - }; - - StandardUnionObjectInspector.StandardUnion[][] booleanBinaryUnions = { - {union(0, new BooleanWritable(true))}, - {union(1, new BytesWritable("Hello".getBytes()))}, - }; - - initAndSerializeAndDeserialize(schema, booleanBinaryUnions); - } - - private Object[][][] toStruct(Object[][] rows) { - Object[][][] struct = new Object[rows.length][][]; - for (int rowIndex = 0; rowIndex < rows.length; rowIndex++) { - Object[] row = rows[rowIndex]; - struct[rowIndex] = new Object[][] {row}; - } - return struct; - } - - @Test - public void testStructInteger() throws SerDeException { - String[][] schema = { - {"int_struct", "struct"}, - }; - - initAndSerializeAndDeserialize(schema, toStruct(INTEGER_ROWS)); - } - - @Test - public void testStructFloat() throws SerDeException { - String[][] schema = { - {"float_struct", "struct"}, - }; - - initAndSerializeAndDeserialize(schema, toStruct(FLOAT_ROWS)); - } - - @Test - public void testStructString() throws SerDeException { - String[][] schema = { - {"string_struct", "struct"}, - }; - - initAndSerializeAndDeserialize(schema, toStruct(STRING_ROWS)); - } - - @Test - public void testStructDTI() throws SerDeException { - String[][] schema = { - {"date_struct", "struct"}, - }; - - initAndSerializeAndDeserialize(schema, toStruct(DTI_ROWS)); - } - - @Test - public void testStructBoolean() throws SerDeException { - String[][] schema = { - {"boolean_struct", "struct"}, - }; - - initAndSerializeAndDeserialize(schema, toStruct(BOOLEAN_ROWS)); - } - - @Test - public void testStructBinary() throws SerDeException { - String[][] schema = { - {"binary_struct", "struct"}, - }; - - initAndSerializeAndDeserialize(schema, toStruct(BINARY_ROWS)); - } - - private Object[][] toMap(Object[][] rows) { - Map[][] array = new Map[rows.length][]; - for (int rowIndex = 0; rowIndex < rows.length; rowIndex++) { - Object[] row = rows[rowIndex]; - array[rowIndex] = new Map[row.length]; - for (int fieldIndex = 0; fieldIndex < row.length; fieldIndex++) { - Map map = Maps.newHashMap(); - map.put(new Text(String.valueOf(row[fieldIndex])), row[fieldIndex]); - array[rowIndex][fieldIndex] = map; - } - } - return array; - } - - @Test - public void testMapInteger() throws SerDeException { - String[][] schema = { - {"tinyint_map", "map"}, - {"smallint_map", "map"}, - {"int_map", "map"}, - {"bigint_map", "map"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(INTEGER_ROWS)); - } - - @Test - public void testMapFloat() throws SerDeException { - String[][] schema = { - {"float_map", "map"}, - {"double_map", "map"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(FLOAT_ROWS)); - } - - @Test - public void testMapString() throws SerDeException { - String[][] schema = { - {"string_map", "map"}, - {"char_map", "map"}, - {"varchar_map", "map"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(STRING_ROWS)); - } - - @Test - public void testMapDTI() throws SerDeException { - String[][] schema = { - {"date_map", "map"}, - {"timestamp_map", "map"}, - {"interval_year_month_map", "map"}, - {"interval_day_time_map", "map"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(DTI_ROWS)); - } - - @Test - public void testMapBoolean() throws SerDeException { - String[][] schema = { - {"boolean_map", "map"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(BOOLEAN_ROWS)); - } - - @Test - public void testMapBinary() throws SerDeException { - String[][] schema = { - {"binary_map", "map"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(BINARY_ROWS)); - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java index e06f0a4f5c..77fe73687a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java +++ ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java @@ -771,6 +771,35 @@ public void checkExpectedLocks2() throws Exception { conf.setBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE, true); } + @Test + public void testLockingOnInsertIntoNonNativeTables() throws Exception { + dropTable(new String[] {"tab_not_acid"}); + checkCmdOnDriver(driver.run("create table if not exists tab_not_acid (a int, b int) " + + " STORED BY 'org.apache.hadoop.hive.ql.metadata.StorageHandlerMock'")); + txnMgr.openTxn(ctx, "T1"); + checkCmdOnDriver(driver.compileAndRespond("insert into tab_not_acid values(1,2)", true)); + + txnMgr.acquireLocks(driver.getPlan(), ctx, "T1"); + List locks = getLocks(txnMgr); + Assert.assertEquals("Unexpected lock count", 2, locks.size()); + checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "tab_not_acid", null, locks); + checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "_dummy_database", "_dummy_table", null, locks); + } + + @Test + public void testLockingOnInsertOverwriteNonNativeTables() throws Exception { + dropTable(new String[] {"tab_not_acid"}); + checkCmdOnDriver(driver.run("create table if not exists tab_not_acid (a int, b int) " + + " STORED BY 'org.apache.hadoop.hive.ql.metadata.StorageHandlerMock'")); + txnMgr.openTxn(ctx, "T1"); + checkCmdOnDriver(driver.compileAndRespond("insert overwrite table tab_not_acid values(1,2)", true)); + txnMgr.acquireLocks(driver.getPlan(), ctx, "T1"); + List locks = getLocks(txnMgr); + Assert.assertEquals("Unexpected lock count", 2, locks.size()); + checkLock(LockType.EXCLUSIVE, LockState.ACQUIRED, "default", "tab_not_acid", null, locks); + checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "_dummy_database", "_dummy_table", null, locks); + } + /** The list is small, and the object is generated, so we don't use sets/equals/etc. */ public static ShowLocksResponseElement checkLock(LockType expectedType, LockState expectedState, String expectedDb, String expectedTable, String expectedPartition, List actuals) { diff --git ql/src/test/org/apache/hadoop/hive/ql/metadata/StorageHandlerMock.java ql/src/test/org/apache/hadoop/hive/ql/metadata/StorageHandlerMock.java new file mode 100644 index 0000000000..dbf9b31998 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/metadata/StorageHandlerMock.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.metastore.api.LockType; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.Progressable; + +import java.io.IOException; + +/** + * Mock class used for unit test + * {@link org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2#testLockingOnInsertIntoNonNativeTables()} + */ +public class StorageHandlerMock extends DefaultStorageHandler { + @Override public HiveMetaHook getMetaHook() { + return new HiveMetaHook() { + @Override public void preCreateTable(Table table) throws MetaException { + + } + + @Override public void rollbackCreateTable(Table table) throws MetaException { + + } + + @Override public void commitCreateTable(Table table) throws MetaException { + + } + + @Override public void preDropTable(Table table) throws MetaException { + + } + + @Override public void rollbackDropTable(Table table) throws MetaException { + + } + + @Override public void commitDropTable(Table table, boolean deleteData) throws MetaException { + + } + }; + } + + @Override public LockType getLockType(WriteEntity writeEntity + ) { + if (writeEntity.getWriteType().equals(WriteEntity.WriteType.INSERT)) { + return LockType.SHARED_READ; + } + return LockType.SHARED_WRITE; + } + + @Override public Class getOutputFormatClass() { + return MockOutputFormat.class; + } + + /** + * Dummy no op output format. + */ + public static class MockOutputFormat implements OutputFormat { + + @Override public RecordWriter getRecordWriter(FileSystem fileSystem, JobConf jobConf, String s, + Progressable progressable + ) throws IOException { + return new RecordWriter() { + @Override public void write(Object o, Object o2) throws IOException { + //noop + } + + @Override public void close(Reporter reporter) throws IOException { + + } + }; + } + + @Override public void checkOutputSpecs(FileSystem fileSystem, JobConf jobConf) throws IOException { + + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFEnforceConstraint.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFEnforceNotNullConstraint.java similarity index 88% rename from ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFEnforceConstraint.java rename to ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFEnforceNotNullConstraint.java index a0da723e41..fc65bb6e62 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFEnforceConstraint.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFEnforceNotNullConstraint.java @@ -26,13 +26,13 @@ import org.apache.hadoop.io.BooleanWritable; /** - * Test class for {@link GenericUDFEnforceConstraint}. + * Test class for {@link GenericUDFEnforceNotNullConstraint}. */ -public class TestGenericUDFEnforceConstraint extends TestCase { +public class TestGenericUDFEnforceNotNullConstraint extends TestCase { public void testNull() throws HiveException { try { - GenericUDFEnforceConstraint udf = new GenericUDFEnforceConstraint(); + GenericUDFEnforceNotNullConstraint udf = new GenericUDFEnforceNotNullConstraint(); ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; ObjectInspector[] arguments = {valueOI }; udf.initialize(arguments); @@ -49,7 +49,7 @@ public void testNull() throws HiveException { public void testInvalidArgumentsLength() throws HiveException { try { - GenericUDFEnforceConstraint udf = new GenericUDFEnforceConstraint(); + GenericUDFEnforceNotNullConstraint udf = new GenericUDFEnforceNotNullConstraint(); ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; ObjectInspector[] arguments = {valueOI1, valueOI2 }; @@ -62,7 +62,7 @@ public void testInvalidArgumentsLength() throws HiveException { } public void testCorrect() throws HiveException { - GenericUDFEnforceConstraint udf = new GenericUDFEnforceConstraint(); + GenericUDFEnforceNotNullConstraint udf = new GenericUDFEnforceNotNullConstraint(); ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; ObjectInspector[] arguments = {valueOI }; udf.initialize(arguments); diff --git ql/src/test/queries/clientpositive/druidkafkamini_basic.q ql/src/test/queries/clientpositive/druidkafkamini_basic.q index 229a20cf81..4c30cdd0ad 100644 --- ql/src/test/queries/clientpositive/druidkafkamini_basic.q +++ ql/src/test/queries/clientpositive/druidkafkamini_basic.q @@ -72,3 +72,4 @@ FROM druid_kafka_test) b ) order by b.`user`; DROP TABLE druid_kafka_test; +DROP TABLE druid_table_1; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/infer_join_preds.q ql/src/test/queries/clientpositive/infer_join_preds.q index 4787de1b43..89827a2d98 100644 --- ql/src/test/queries/clientpositive/infer_join_preds.q +++ ql/src/test/queries/clientpositive/infer_join_preds.q @@ -61,225 +61,3 @@ select * from (select * from src)a right outer join (select * from src1 where 1 = 0)b on a.key = b.key; - -explain select * from src join src1 on src.key = src1.key and src.value = src1.value - where 4 between src.key and src.value; - - CREATE TABLE `table1`( - `idp_warehouse_id` bigint, - `idp_audit_id` bigint, - `idp_effective_date` date, - `idp_end_date` date, - `idp_delete_date` date, - `pruid` varchar(32), - `prid` bigint, - `prtimesheetid` bigint, - `prassignmentid` bigint, - `prchargecodeid` bigint, - `prtypecodeid` bigint, - `prsequence` bigint, - `prmodby` varchar(96), - `prmodtime` timestamp, - `prrmexported` bigint, - `prrmckdel` bigint, - `slice_status` int, - `role_id` bigint, - `user_lov1` varchar(30), - `user_lov2` varchar(30), - `incident_id` bigint, - `incident_investment_id` bigint, - `odf_ss_actuals` bigint, - `practsum` decimal(38,20)); - - CREATE TABLE `table2`( - `idp_warehouse_id` bigint, - `idp_audit_id` bigint, - `idp_effective_date` date, - `idp_end_date` date, - `idp_delete_date` date, - `pruid` varchar(32), - `prid` bigint, - `prtimesheetid` bigint, - `prassignmentid` bigint, - `prchargecodeid` bigint, - `prtypecodeid` bigint, - `prsequence` bigint, - `prmodby` varchar(96), - `prmodtime` timestamp, - `prrmexported` bigint, - `prrmckdel` bigint, - `slice_status` int, - `role_id` bigint, - `user_lov1` varchar(30), - `user_lov2` varchar(30), - `incident_id` bigint, - `incident_investment_id` bigint, - `odf_ss_actuals` bigint, - `practsum` decimal(38,20)); - - explain SELECT s.idp_warehouse_id AS source_warehouse_id - FROM table1 s - JOIN - - table2 d - ON ( - s.prid = d.prid ) - JOIN - table2 e - ON - s.prid = e.prid - WHERE - concat( - CASE - WHEN s.prid IS NULL THEN 1 - ELSE s.prid - END,',', - CASE - WHEN s.prtimesheetid IS NULL THEN 1 - ELSE s.prtimesheetid - END,',', - CASE - WHEN s.prassignmentid IS NULL THEN 1 - ELSE s.prassignmentid - END,',', - CASE - WHEN s.prchargecodeid IS NULL THEN 1 - ELSE s.prchargecodeid - END,',', - CASE - WHEN (s.prtypecodeid) IS NULL THEN '' - ELSE s.prtypecodeid - END,',', - CASE - WHEN s.practsum IS NULL THEN 1 - ELSE s.practsum - END,',', - CASE - WHEN s.prsequence IS NULL THEN 1 - ELSE s.prsequence - END,',', - CASE - WHEN length(s.prmodby) IS NULL THEN '' - ELSE s.prmodby - END,',', - CASE - WHEN s.prmodtime IS NULL THEN cast(from_unixtime(unix_timestamp('2017-12-08','yyyy-MM-dd') ) AS timestamp) - ELSE s.prmodtime - END,',', - CASE - WHEN s.prrmexported IS NULL THEN 1 - ELSE s.prrmexported - END,',', - CASE - WHEN s.prrmckdel IS NULL THEN 1 - ELSE s.prrmckdel - END,',', - CASE - WHEN s.slice_status IS NULL THEN 1 - ELSE s.slice_status - END,',', - CASE - WHEN s.role_id IS NULL THEN 1 - ELSE s.role_id - END,',', - CASE - WHEN length(s.user_lov1) IS NULL THEN '' - ELSE s.user_lov1 - END,',', - CASE - WHEN length(s.user_lov2) IS NULL THEN '' - ELSE s.user_lov2 - END,',', - CASE - WHEN s.incident_id IS NULL THEN 1 - ELSE s.incident_id - END,',', - CASE - WHEN s.incident_investment_id IS NULL THEN 1 - ELSE s.incident_investment_id - END,',', - CASE - WHEN s.odf_ss_actuals IS NULL THEN 1 - ELSE s.odf_ss_actuals - END ) != concat( - CASE - WHEN length(d.pruid) IS NULL THEN '' - ELSE d.pruid - END,',', - CASE - WHEN d.prid IS NULL THEN 1 - ELSE d.prid - END,',', - CASE - WHEN d.prtimesheetid IS NULL THEN 1 - ELSE d.prtimesheetid - END,',', - CASE - WHEN d.prassignmentid IS NULL THEN 1 - ELSE d.prassignmentid - END,',', - CASE - WHEN d.prchargecodeid IS NULL THEN 1 - ELSE d.prchargecodeid - END,',', - CASE - WHEN (d.prtypecodeid) IS NULL THEN '' - ELSE d.prtypecodeid - END,',', - CASE - WHEN d.practsum IS NULL THEN 1 - ELSE d.practsum - END,',', - CASE - WHEN d.prsequence IS NULL THEN 1 - ELSE d.prsequence - END,',', - CASE - WHEN length(d.prmodby) IS NULL THEN '' - ELSE d.prmodby - END,',', - CASE - WHEN d.prmodtime IS NULL THEN cast(from_unixtime(unix_timestamp('2017-12-08','yyyy-MM-dd') ) AS timestamp) - ELSE d.prmodtime - END,',', - CASE - WHEN d.prrmexported IS NULL THEN 1 - ELSE d.prrmexported - END,',', - CASE - WHEN d.prrmckdel IS NULL THEN 1 - ELSE d.prrmckdel - END,',', - CASE - WHEN d.slice_status IS NULL THEN 1 - ELSE d.slice_status - END,',', - CASE - WHEN d.role_id IS NULL THEN 1 - ELSE d.role_id - END,',', - CASE - WHEN length(d.user_lov1) IS NULL THEN '' - ELSE d.user_lov1 - END,',', - CASE - WHEN length(d.user_lov2) IS NULL THEN '' - ELSE d.user_lov2 - END,',', - CASE - WHEN d.incident_id IS NULL THEN 1 - ELSE d.incident_id - END,',', - CASE - WHEN d.incident_investment_id IS NULL THEN 1 - ELSE d.incident_investment_id - END,',', - CASE - WHEN d.odf_ss_actuals IS NULL THEN 1 - ELSE d.odf_ss_actuals - END ); - -drop table table2; -drop table table1; - - diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index d53fa1e503..c3236c9bf7 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,42 +1,23 @@ --! qt:dataset:alltypesorc - set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; -SET hive.vectorized.execution.enabled = false; - -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. -- Turning on vectorization has been temporarily moved after filling the test table -- due to bug HIVE-8197. --- SORT_QUERY_RESULTS -CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, - ctimestamp2 timestamp) STORED AS ORC; +CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC; INSERT OVERWRITE TABLE alltypesorc_string SELECT - cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, - ctimestamp2 + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst FROM alltypesorc ORDER BY toutc, cst LIMIT 40; -INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939'); -INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null); -INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null); -INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183'); -INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null); -INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null); -INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778'); -INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101'); -INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null); - -INSERT INTO TABLE alltypesorc_string values - (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), - (true, null, '1985-11-18 16:37:54.0', null), - (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989'); + +SET hive.vectorized.execution.enabled = true; CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC; @@ -44,8 +25,6 @@ INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; -SET hive.vectorized.execution.enabled = true; - EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), @@ -55,15 +34,7 @@ EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1; @@ -76,15 +47,7 @@ SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1; diff --git ql/src/test/results/clientnegative/alter_notnull_constraint_violation.q.out ql/src/test/results/clientnegative/alter_notnull_constraint_violation.q.out index 2445b5de7f..65195dc55b 100644 --- ql/src/test/results/clientnegative/alter_notnull_constraint_violation.q.out +++ ql/src/test/results/clientnegative/alter_notnull_constraint_violation.q.out @@ -24,4 +24,4 @@ POSTHOOK: query: alter table t1 change j j int constraint nn0 not null enforced POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: NOT NULL constraint violated! diff --git ql/src/test/results/clientnegative/insert_into_acid_notnull.q.out ql/src/test/results/clientnegative/insert_into_acid_notnull.q.out index 777a087878..172c93355f 100644 --- ql/src/test/results/clientnegative/insert_into_acid_notnull.q.out +++ ql/src/test/results/clientnegative/insert_into_acid_notnull.q.out @@ -10,4 +10,4 @@ POSTHOOK: query: create table acid_uami(i int, POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@acid_uami -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: NOT NULL constraint violated! diff --git ql/src/test/results/clientnegative/insert_into_notnull_constraint.q.out ql/src/test/results/clientnegative/insert_into_notnull_constraint.q.out index 96feec0d30..dd720fad75 100644 --- ql/src/test/results/clientnegative/insert_into_notnull_constraint.q.out +++ ql/src/test/results/clientnegative/insert_into_notnull_constraint.q.out @@ -6,4 +6,4 @@ POSTHOOK: query: create table nullConstraintCheck(i int NOT NULL enforced, j int POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@nullConstraintCheck -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: NOT NULL constraint violated! diff --git ql/src/test/results/clientnegative/insert_multi_into_notnull.q.out ql/src/test/results/clientnegative/insert_multi_into_notnull.q.out index 74e112f6cd..1beeb26f95 100644 --- ql/src/test/results/clientnegative/insert_multi_into_notnull.q.out +++ ql/src/test/results/clientnegative/insert_multi_into_notnull.q.out @@ -14,4 +14,4 @@ POSTHOOK: query: create table src_multi2 (i STRING, j STRING NOT NULL ENFORCED) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@src_multi2 -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: NOT NULL constraint violated! diff --git ql/src/test/results/clientnegative/insert_overwrite_notnull_constraint.q.out ql/src/test/results/clientnegative/insert_overwrite_notnull_constraint.q.out index 96feec0d30..dd720fad75 100644 --- ql/src/test/results/clientnegative/insert_overwrite_notnull_constraint.q.out +++ ql/src/test/results/clientnegative/insert_overwrite_notnull_constraint.q.out @@ -6,4 +6,4 @@ POSTHOOK: query: create table nullConstraintCheck(i int NOT NULL enforced, j int POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@nullConstraintCheck -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: NOT NULL constraint violated! diff --git ql/src/test/results/clientnegative/update_notnull_constraint.q.out ql/src/test/results/clientnegative/update_notnull_constraint.q.out index 86bfc67480..8748681dab 100644 --- ql/src/test/results/clientnegative/update_notnull_constraint.q.out +++ ql/src/test/results/clientnegative/update_notnull_constraint.q.out @@ -21,4 +21,4 @@ POSTHOOK: Output: default@acid_uami POSTHOOK: Lineage: acid_uami.de SCRIPT [] POSTHOOK: Lineage: acid_uami.i SCRIPT [] POSTHOOK: Lineage: acid_uami.vc SCRIPT [] -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: NOT NULL constraint violated! diff --git ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out index 2e6d768d6b..c2cc24921c 100644 --- ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out +++ ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out @@ -71,7 +71,7 @@ kafkaPartitions=1 activeTasks=[] publishingTasks=[] latestOffsets={0=10} -minimumLag={0=0} +minimumLag={} aggregateLag=0 #### A masked pattern was here #### PREHOOK: query: Select count(*) FROM druid_kafka_test @@ -501,3 +501,7 @@ POSTHOOK: query: DROP TABLE druid_kafka_test POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@druid_kafka_test POSTHOOK: Output: default@druid_kafka_test +PREHOOK: query: DROP TABLE druid_table_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE druid_table_1 +POSTHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/infer_join_preds.q.out ql/src/test/results/clientpositive/infer_join_preds.q.out index a35faf31eb..6a4fa34653 100644 --- ql/src/test/results/clientpositive/infer_join_preds.q.out +++ ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -607,597 +607,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -PREHOOK: query: explain select * from src join src1 on src.key = src1.key and src.value = src1.value - where 4 between src.key and src.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from src join src1 on src.key = src1.key and src.value = src1.value - where 4 between src.key and src.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (4.0D BETWEEN UDFToDouble(key) AND UDFToDouble(value) and key is not null and value is not null) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (4.0D BETWEEN UDFToDouble(key) AND UDFToDouble(value) and key is not null and value is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE `table1`( - `idp_warehouse_id` bigint, - `idp_audit_id` bigint, - `idp_effective_date` date, - `idp_end_date` date, - `idp_delete_date` date, - `pruid` varchar(32), - `prid` bigint, - `prtimesheetid` bigint, - `prassignmentid` bigint, - `prchargecodeid` bigint, - `prtypecodeid` bigint, - `prsequence` bigint, - `prmodby` varchar(96), - `prmodtime` timestamp, - `prrmexported` bigint, - `prrmckdel` bigint, - `slice_status` int, - `role_id` bigint, - `user_lov1` varchar(30), - `user_lov2` varchar(30), - `incident_id` bigint, - `incident_investment_id` bigint, - `odf_ss_actuals` bigint, - `practsum` decimal(38,20)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@table1 -POSTHOOK: query: CREATE TABLE `table1`( - `idp_warehouse_id` bigint, - `idp_audit_id` bigint, - `idp_effective_date` date, - `idp_end_date` date, - `idp_delete_date` date, - `pruid` varchar(32), - `prid` bigint, - `prtimesheetid` bigint, - `prassignmentid` bigint, - `prchargecodeid` bigint, - `prtypecodeid` bigint, - `prsequence` bigint, - `prmodby` varchar(96), - `prmodtime` timestamp, - `prrmexported` bigint, - `prrmckdel` bigint, - `slice_status` int, - `role_id` bigint, - `user_lov1` varchar(30), - `user_lov2` varchar(30), - `incident_id` bigint, - `incident_investment_id` bigint, - `odf_ss_actuals` bigint, - `practsum` decimal(38,20)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@table1 -PREHOOK: query: CREATE TABLE `table2`( - `idp_warehouse_id` bigint, - `idp_audit_id` bigint, - `idp_effective_date` date, - `idp_end_date` date, - `idp_delete_date` date, - `pruid` varchar(32), - `prid` bigint, - `prtimesheetid` bigint, - `prassignmentid` bigint, - `prchargecodeid` bigint, - `prtypecodeid` bigint, - `prsequence` bigint, - `prmodby` varchar(96), - `prmodtime` timestamp, - `prrmexported` bigint, - `prrmckdel` bigint, - `slice_status` int, - `role_id` bigint, - `user_lov1` varchar(30), - `user_lov2` varchar(30), - `incident_id` bigint, - `incident_investment_id` bigint, - `odf_ss_actuals` bigint, - `practsum` decimal(38,20)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@table2 -POSTHOOK: query: CREATE TABLE `table2`( - `idp_warehouse_id` bigint, - `idp_audit_id` bigint, - `idp_effective_date` date, - `idp_end_date` date, - `idp_delete_date` date, - `pruid` varchar(32), - `prid` bigint, - `prtimesheetid` bigint, - `prassignmentid` bigint, - `prchargecodeid` bigint, - `prtypecodeid` bigint, - `prsequence` bigint, - `prmodby` varchar(96), - `prmodtime` timestamp, - `prrmexported` bigint, - `prrmckdel` bigint, - `slice_status` int, - `role_id` bigint, - `user_lov1` varchar(30), - `user_lov2` varchar(30), - `incident_id` bigint, - `incident_investment_id` bigint, - `odf_ss_actuals` bigint, - `practsum` decimal(38,20)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@table2 -PREHOOK: query: explain SELECT s.idp_warehouse_id AS source_warehouse_id - FROM table1 s - JOIN - - table2 d - ON ( - s.prid = d.prid ) - JOIN - table2 e - ON - s.prid = e.prid - WHERE - concat( - CASE - WHEN s.prid IS NULL THEN 1 - ELSE s.prid - END,',', - CASE - WHEN s.prtimesheetid IS NULL THEN 1 - ELSE s.prtimesheetid - END,',', - CASE - WHEN s.prassignmentid IS NULL THEN 1 - ELSE s.prassignmentid - END,',', - CASE - WHEN s.prchargecodeid IS NULL THEN 1 - ELSE s.prchargecodeid - END,',', - CASE - WHEN (s.prtypecodeid) IS NULL THEN '' - ELSE s.prtypecodeid - END,',', - CASE - WHEN s.practsum IS NULL THEN 1 - ELSE s.practsum - END,',', - CASE - WHEN s.prsequence IS NULL THEN 1 - ELSE s.prsequence - END,',', - CASE - WHEN length(s.prmodby) IS NULL THEN '' - ELSE s.prmodby - END,',', - CASE - WHEN s.prmodtime IS NULL THEN cast(from_unixtime(unix_timestamp('2017-12-08','yyyy-MM-dd') ) AS timestamp) - ELSE s.prmodtime - END,',', - CASE - WHEN s.prrmexported IS NULL THEN 1 - ELSE s.prrmexported - END,',', - CASE - WHEN s.prrmckdel IS NULL THEN 1 - ELSE s.prrmckdel - END,',', - CASE - WHEN s.slice_status IS NULL THEN 1 - ELSE s.slice_status - END,',', - CASE - WHEN s.role_id IS NULL THEN 1 - ELSE s.role_id - END,',', - CASE - WHEN length(s.user_lov1) IS NULL THEN '' - ELSE s.user_lov1 - END,',', - CASE - WHEN length(s.user_lov2) IS NULL THEN '' - ELSE s.user_lov2 - END,',', - CASE - WHEN s.incident_id IS NULL THEN 1 - ELSE s.incident_id - END,',', - CASE - WHEN s.incident_investment_id IS NULL THEN 1 - ELSE s.incident_investment_id - END,',', - CASE - WHEN s.odf_ss_actuals IS NULL THEN 1 - ELSE s.odf_ss_actuals - END ) != concat( - CASE - WHEN length(d.pruid) IS NULL THEN '' - ELSE d.pruid - END,',', - CASE - WHEN d.prid IS NULL THEN 1 - ELSE d.prid - END,',', - CASE - WHEN d.prtimesheetid IS NULL THEN 1 - ELSE d.prtimesheetid - END,',', - CASE - WHEN d.prassignmentid IS NULL THEN 1 - ELSE d.prassignmentid - END,',', - CASE - WHEN d.prchargecodeid IS NULL THEN 1 - ELSE d.prchargecodeid - END,',', - CASE - WHEN (d.prtypecodeid) IS NULL THEN '' - ELSE d.prtypecodeid - END,',', - CASE - WHEN d.practsum IS NULL THEN 1 - ELSE d.practsum - END,',', - CASE - WHEN d.prsequence IS NULL THEN 1 - ELSE d.prsequence - END,',', - CASE - WHEN length(d.prmodby) IS NULL THEN '' - ELSE d.prmodby - END,',', - CASE - WHEN d.prmodtime IS NULL THEN cast(from_unixtime(unix_timestamp('2017-12-08','yyyy-MM-dd') ) AS timestamp) - ELSE d.prmodtime - END,',', - CASE - WHEN d.prrmexported IS NULL THEN 1 - ELSE d.prrmexported - END,',', - CASE - WHEN d.prrmckdel IS NULL THEN 1 - ELSE d.prrmckdel - END,',', - CASE - WHEN d.slice_status IS NULL THEN 1 - ELSE d.slice_status - END,',', - CASE - WHEN d.role_id IS NULL THEN 1 - ELSE d.role_id - END,',', - CASE - WHEN length(d.user_lov1) IS NULL THEN '' - ELSE d.user_lov1 - END,',', - CASE - WHEN length(d.user_lov2) IS NULL THEN '' - ELSE d.user_lov2 - END,',', - CASE - WHEN d.incident_id IS NULL THEN 1 - ELSE d.incident_id - END,',', - CASE - WHEN d.incident_investment_id IS NULL THEN 1 - ELSE d.incident_investment_id - END,',', - CASE - WHEN d.odf_ss_actuals IS NULL THEN 1 - ELSE d.odf_ss_actuals - END ) -PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT s.idp_warehouse_id AS source_warehouse_id - FROM table1 s - JOIN - - table2 d - ON ( - s.prid = d.prid ) - JOIN - table2 e - ON - s.prid = e.prid - WHERE - concat( - CASE - WHEN s.prid IS NULL THEN 1 - ELSE s.prid - END,',', - CASE - WHEN s.prtimesheetid IS NULL THEN 1 - ELSE s.prtimesheetid - END,',', - CASE - WHEN s.prassignmentid IS NULL THEN 1 - ELSE s.prassignmentid - END,',', - CASE - WHEN s.prchargecodeid IS NULL THEN 1 - ELSE s.prchargecodeid - END,',', - CASE - WHEN (s.prtypecodeid) IS NULL THEN '' - ELSE s.prtypecodeid - END,',', - CASE - WHEN s.practsum IS NULL THEN 1 - ELSE s.practsum - END,',', - CASE - WHEN s.prsequence IS NULL THEN 1 - ELSE s.prsequence - END,',', - CASE - WHEN length(s.prmodby) IS NULL THEN '' - ELSE s.prmodby - END,',', - CASE - WHEN s.prmodtime IS NULL THEN cast(from_unixtime(unix_timestamp('2017-12-08','yyyy-MM-dd') ) AS timestamp) - ELSE s.prmodtime - END,',', - CASE - WHEN s.prrmexported IS NULL THEN 1 - ELSE s.prrmexported - END,',', - CASE - WHEN s.prrmckdel IS NULL THEN 1 - ELSE s.prrmckdel - END,',', - CASE - WHEN s.slice_status IS NULL THEN 1 - ELSE s.slice_status - END,',', - CASE - WHEN s.role_id IS NULL THEN 1 - ELSE s.role_id - END,',', - CASE - WHEN length(s.user_lov1) IS NULL THEN '' - ELSE s.user_lov1 - END,',', - CASE - WHEN length(s.user_lov2) IS NULL THEN '' - ELSE s.user_lov2 - END,',', - CASE - WHEN s.incident_id IS NULL THEN 1 - ELSE s.incident_id - END,',', - CASE - WHEN s.incident_investment_id IS NULL THEN 1 - ELSE s.incident_investment_id - END,',', - CASE - WHEN s.odf_ss_actuals IS NULL THEN 1 - ELSE s.odf_ss_actuals - END ) != concat( - CASE - WHEN length(d.pruid) IS NULL THEN '' - ELSE d.pruid - END,',', - CASE - WHEN d.prid IS NULL THEN 1 - ELSE d.prid - END,',', - CASE - WHEN d.prtimesheetid IS NULL THEN 1 - ELSE d.prtimesheetid - END,',', - CASE - WHEN d.prassignmentid IS NULL THEN 1 - ELSE d.prassignmentid - END,',', - CASE - WHEN d.prchargecodeid IS NULL THEN 1 - ELSE d.prchargecodeid - END,',', - CASE - WHEN (d.prtypecodeid) IS NULL THEN '' - ELSE d.prtypecodeid - END,',', - CASE - WHEN d.practsum IS NULL THEN 1 - ELSE d.practsum - END,',', - CASE - WHEN d.prsequence IS NULL THEN 1 - ELSE d.prsequence - END,',', - CASE - WHEN length(d.prmodby) IS NULL THEN '' - ELSE d.prmodby - END,',', - CASE - WHEN d.prmodtime IS NULL THEN cast(from_unixtime(unix_timestamp('2017-12-08','yyyy-MM-dd') ) AS timestamp) - ELSE d.prmodtime - END,',', - CASE - WHEN d.prrmexported IS NULL THEN 1 - ELSE d.prrmexported - END,',', - CASE - WHEN d.prrmckdel IS NULL THEN 1 - ELSE d.prrmckdel - END,',', - CASE - WHEN d.slice_status IS NULL THEN 1 - ELSE d.slice_status - END,',', - CASE - WHEN d.role_id IS NULL THEN 1 - ELSE d.role_id - END,',', - CASE - WHEN length(d.user_lov1) IS NULL THEN '' - ELSE d.user_lov1 - END,',', - CASE - WHEN length(d.user_lov2) IS NULL THEN '' - ELSE d.user_lov2 - END,',', - CASE - WHEN d.incident_id IS NULL THEN 1 - ELSE d.incident_id - END,',', - CASE - WHEN d.incident_investment_id IS NULL THEN 1 - ELSE d.incident_investment_id - END,',', - CASE - WHEN d.odf_ss_actuals IS NULL THEN 1 - ELSE d.odf_ss_actuals - END ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: prid is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: prid (type: bigint) - sort order: + - Map-reduce partition columns: prid (type: bigint) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: idp_warehouse_id (type: bigint), prtimesheetid (type: bigint), prassignmentid (type: bigint), prchargecodeid (type: bigint), prtypecodeid (type: bigint), prsequence (type: bigint), prmodby (type: varchar(96)), prmodtime (type: timestamp), prrmexported (type: bigint), prrmckdel (type: bigint), slice_status (type: int), role_id (type: bigint), user_lov1 (type: varchar(30)), user_lov2 (type: varchar(30)), incident_id (type: bigint), incident_investment_id (type: bigint), odf_ss_actuals (type: bigint), practsum (type: decimal(38,20)) - TableScan - alias: d - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: prid is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: prid (type: bigint) - sort order: + - Map-reduce partition columns: prid (type: bigint) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: pruid (type: varchar(32)), prtimesheetid (type: bigint), prassignmentid (type: bigint), prchargecodeid (type: bigint), prtypecodeid (type: bigint), prsequence (type: bigint), prmodby (type: varchar(96)), prmodtime (type: timestamp), prrmexported (type: bigint), prrmckdel (type: bigint), slice_status (type: int), role_id (type: bigint), user_lov1 (type: varchar(30)), user_lov2 (type: varchar(30)), incident_id (type: bigint), incident_investment_id (type: bigint), odf_ss_actuals (type: bigint), practsum (type: decimal(38,20)) - TableScan - alias: e - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: prid is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: prid (type: bigint) - sort order: + - Map-reduce partition columns: prid (type: bigint) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 prid (type: bigint) - 1 prid (type: bigint) - 2 prid (type: bigint) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (concat(CASE WHEN (_col6 is null) THEN (1) ELSE (_col6) END, ',', CASE WHEN (_col7 is null) THEN (1) ELSE (_col7) END, ',', CASE WHEN (_col8 is null) THEN (1) ELSE (_col8) END, ',', CASE WHEN (_col9 is null) THEN (1) ELSE (_col9) END, ',', CASE WHEN (_col10 is null) THEN ('') ELSE (_col10) END, ',', CASE WHEN (_col23 is null) THEN (1) ELSE (_col23) END, ',', CASE WHEN (_col11 is null) THEN (1) ELSE (_col11) END, ',', CASE WHEN (length(_col12) is null) THEN ('') ELSE (_col12) END, ',', CASE WHEN (_col13 is null) THEN (TIMESTAMP'2017-12-08 00:00:00.0') ELSE (_col13) END, ',', CASE WHEN (_col14 is null) THEN (1) ELSE (_col14) END, ',', CASE WHEN (_col15 is null) THEN (1) ELSE (_col15) END, ',', CASE WHEN (_col16 is null) THEN (1) ELSE (_col16) END, ',', CASE WHEN (_col17 is null) THEN (1) ELSE (_col17) END, ',', CASE WHEN (length(_col18) is null) THEN ('') ELSE (_col18) END, ',', CASE WHEN (length(_col19) is null) THEN ('') ELSE (_col19) END, ',', CASE WHEN (_col20 is null) THEN (1) ELSE (_col20) END, ',', CASE WHEN (_col21 is null) THEN (1) ELSE (_col21) END, ',', CASE WHEN (_col22 is null) THEN (1) ELSE (_col22) END) <> concat(CASE WHEN (length(_col32) is null) THEN ('') ELSE (_col32) END, ',', CASE WHEN (_col33 is null) THEN (1) ELSE (_col33) END, ',', CASE WHEN (_col34 is null) THEN (1) ELSE (_col34) END, ',', CASE WHEN (_col35 is null) THEN (1) ELSE (_col35) END, ',', CASE WHEN (_col36 is null) THEN (1) ELSE (_col36) END, ',', CASE WHEN (_col37 is null) THEN ('') ELSE (_col37) END, ',', CASE WHEN (_col50 is null) THEN (1) ELSE (_col50) END, ',', CASE WHEN (_col38 is null) THEN (1) ELSE (_col38) END, ',', CASE WHEN (length(_col39) is null) THEN ('') ELSE (_col39) END, ',', CASE WHEN (_col40 is null) THEN (TIMESTAMP'2017-12-08 00:00:00.0') ELSE (_col40) END, ',', CASE WHEN (_col41 is null) THEN (1) ELSE (_col41) END, ',', CASE WHEN (_col42 is null) THEN (1) ELSE (_col42) END, ',', CASE WHEN (_col43 is null) THEN (1) ELSE (_col43) END, ',', CASE WHEN (_col44 is null) THEN (1) ELSE (_col44) END, ',', CASE WHEN (length(_col45) is null) THEN ('') ELSE (_col45) END, ',', CASE WHEN (length(_col46) is null) THEN ('') ELSE (_col46) END, ',', CASE WHEN (_col47 is null) THEN (1) ELSE (_col47) END, ',', CASE WHEN (_col48 is null) THEN (1) ELSE (_col48) END, ',', CASE WHEN (_col49 is null) THEN (1) ELSE (_col49) END)) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: drop table table2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table2 -PREHOOK: Output: default@table2 -POSTHOOK: query: drop table table2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table2 -POSTHOOK: Output: default@table2 -PREHOOK: query: drop table table1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: drop table table1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 66b019bed4..116e00be01 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -235,25 +235,25 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,15 +823,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 @@ -840,10 +840,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1436,7 +1436,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 207 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1565,7 +1565,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 207 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1696,7 +1696,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 708 Data size: 5664 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 662 Data size: 5296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1731,16 +1731,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1838,11 +1838,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1851,7 +1851,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 708 Data size: 5664 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 662 Data size: 5296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -2007,7 +2007,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 708 Data size: 5664 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 662 Data size: 5296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -2042,16 +2042,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2149,11 +2149,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 414 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2162,7 +2162,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 708 Data size: 5664 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 662 Data size: 5296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -2566,11 +2566,11 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 414 Data size: 39330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 414 Data size: 39330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) @@ -2708,11 +2708,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 0 Map 2 - Statistics: Num rows: 414 Data size: 39330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 414 Data size: 39330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) @@ -2866,14 +2866,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2949,14 +2949,14 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Reducer 3 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3139,14 +3139,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 655 Data size: 121830 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 633 Data size: 117738 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 655 Data size: 121830 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 633 Data size: 117738 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 655 Data size: 121830 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 633 Data size: 117738 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3202,14 +3202,14 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - Statistics: Num rows: 655 Data size: 121830 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 633 Data size: 117738 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 655 Data size: 121830 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 633 Data size: 117738 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 655 Data size: 121830 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 633 Data size: 117738 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3304,12 +3304,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 414 Data size: 39330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 414 Data size: 39330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -3362,14 +3362,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 654 Data size: 121644 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 632 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 654 Data size: 121644 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 632 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 654 Data size: 121644 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 632 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3422,7 +3422,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 414 Data size: 39330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3432,14 +3432,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 654 Data size: 121644 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 632 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 654 Data size: 121644 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 632 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 654 Data size: 121644 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 632 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3586,14 +3586,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 207 Data size: 21321 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 20600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 207 Data size: 21321 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 20600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 207 Data size: 21321 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 20600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3675,14 +3675,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 207 Data size: 21321 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 20600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 207 Data size: 21321 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 20600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 207 Data size: 21321 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 20600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3799,14 +3799,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3882,14 +3882,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 261 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 253 Data size: 26059 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4178,14 +4178,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4258,14 +4258,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4374,14 +4374,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 4 - Statistics: Num rows: 1385 Data size: 11080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1385 Data size: 11080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1385 Data size: 11080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4440,14 +4440,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 3 - Statistics: Num rows: 1385 Data size: 11080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1385 Data size: 11080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1385 Data size: 11080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4585,14 +4585,14 @@ STAGE PLANS: 0 _col0 (type: int), _col2 (type: string) 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4689,14 +4689,14 @@ STAGE PLANS: 0 _col0 (type: int), _col2 (type: string) 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 77004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 400 Data size: 74400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/mm_bhif.q.out ql/src/test/results/clientpositive/llap/mm_bhif.q.out deleted file mode 100644 index f6a7ed389f..0000000000 --- ql/src/test/results/clientpositive/llap/mm_bhif.q.out +++ /dev/null @@ -1,131 +0,0 @@ -PREHOOK: query: CREATE TABLE T1_mm(key STRING, val STRING) PARTITIONED BY (ds string) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T1_mm -POSTHOOK: query: CREATE TABLE T1_mm(key STRING, val STRING) PARTITIONED BY (ds string) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T1_mm -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_mm PARTITION (ds='1') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t1_mm -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_mm PARTITION (ds='1') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t1_mm -POSTHOOK: Output: default@t1_mm@ds=1 -PREHOOK: query: INSERT OVERWRITE TABLE T1_mm PARTITION (ds='1') select key, val from T1_mm where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_mm -PREHOOK: Input: default@t1_mm@ds=1 -PREHOOK: Output: default@t1_mm@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1_mm PARTITION (ds='1') select key, val from T1_mm where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_mm -POSTHOOK: Input: default@t1_mm@ds=1 -POSTHOOK: Output: default@t1_mm@ds=1 -POSTHOOK: Lineage: t1_mm PARTITION(ds=1).key SIMPLE [(t1_mm)t1_mm.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t1_mm PARTITION(ds=1).val SIMPLE [(t1_mm)t1_mm.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: select * from T1_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_mm -PREHOOK: Input: default@t1_mm@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from T1_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_mm -POSTHOOK: Input: default@t1_mm@ds=1 -#### A masked pattern was here #### -1 11 1 -2 12 1 -3 13 1 -7 17 1 -8 18 1 -8 28 1 -PREHOOK: query: explain -select count(distinct key) from T1_mm -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(distinct key) from T1_mm -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1_mm - Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(distinct key) from T1_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_mm -PREHOOK: Input: default@t1_mm@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from T1_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_mm -POSTHOOK: Input: default@t1_mm@ds=1 -#### A masked pattern was here #### -5 -PREHOOK: query: DROP TABLE T1_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t1_mm -PREHOOK: Output: default@t1_mm -POSTHOOK: query: DROP TABLE T1_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t1_mm -POSTHOOK: Output: default@t1_mm diff --git ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out index 68b68c4ba2..33ce299d7c 100644 --- ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out @@ -404,10 +404,10 @@ order by c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --8915 1 +-13036 1 -3799 1 10782 1 --13036 1 +-8915 1 NULL 6 PREHOOK: query: explain select diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 79ba4c6532..c9dd434967 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -1,19 +1,15 @@ -PREHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, - ctimestamp2 timestamp) STORED AS ORC +PREHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, - ctimestamp2 timestamp) STORED AS ORC +POSTHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@alltypesorc_string PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT - cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, - ctimestamp2 + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst FROM alltypesorc ORDER BY toutc, cst LIMIT 40 @@ -22,146 +18,16 @@ PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT - cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, - ctimestamp2 + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst FROM alltypesorc ORDER BY toutc, cst LIMIT 40 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values - (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), - (true, null, '1985-11-18 16:37:54.0', null), - (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values - (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), - (true, null, '1985-11-18 16:37:54.0', null), - (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] PREHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -206,15 +72,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -227,15 +85,7 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY @@ -259,18 +109,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 4356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), if(cboolean1, ctimestamp1, TIMESTAMP'1319-02-02 16:31:57.778') (type: timestamp), if(cboolean1, TIMESTAMP'2000-12-18 08:42:30.0005', ctimestamp1) (type: timestamp), if(cboolean1, ctimestamp1, ctimestamp2) (type: timestamp), if(cboolean1, ctimestamp1, null) (type: timestamp), if(cboolean1, null, ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 3, 14, 15, 16, 17, 18] - selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 9:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 10:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 11:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 12:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 13:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-02-02 16:31:57.778) -> 14:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 08:42:30.0005, col 1:timestamp) -> 15:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 16:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 17:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 18:timestamp - Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 8:int, VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 9:int, VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 10:int, VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 11:int + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -278,8 +128,8 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -301,19 +151,19 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int), VALUE._col8 (type: boolean), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp), VALUE._col15 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -334,15 +184,7 @@ PREHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -357,72 +199,52 @@ POSTHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### --45479000681 528 10 27 27 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL -1632478712 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 NULL 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 NULL NULL NULL -163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 NULL 1966-08-16 13:36:50.183 -163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 NULL 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 NULL NULL NULL -490725011 1985 7 20 20 29 9 30 11 true 1985-07-20 09:30:11 1319-02-02 16:31:57.778 1985-07-20 09:30:11 2000-12-18 08:42:30.0005 1985-07-20 09:30:11 1985-07-20 09:30:11 NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.028 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.028 NULL 1969-12-31 15:59:44.028 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.809 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.809 NULL 1969-12-31 15:59:44.809 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:50.531 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:50.531 NULL 1969-12-31 15:59:50.531 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:51.009 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:51.009 NULL 1969-12-31 15:59:51.009 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.761 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.761 NULL 1969-12-31 15:59:53.761 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:00.905 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:00.905 NULL 1969-12-31 16:00:00.905 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:03.586 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:03.586 NULL 1969-12-31 16:00:03.586 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.227 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.227 NULL 1969-12-31 16:00:05.227 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.535 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.535 NULL 1969-12-31 16:00:05.535 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.02 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.02 NULL 1969-12-31 16:00:07.02 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.365 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.365 NULL 1969-12-31 16:00:07.365 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.517 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.517 NULL 1969-12-31 16:00:07.517 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.767 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.767 NULL 1969-12-31 16:00:07.767 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:08.602 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:08.602 NULL 1969-12-31 16:00:08.602 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:09.938 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:09.938 NULL 1969-12-31 16:00:09.938 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.214 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.214 NULL 1969-12-31 16:00:14.214 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.783 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.783 NULL 1969-12-31 16:00:14.783 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:43.773 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.262 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.568 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.351 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.446 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.023 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.629 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.177 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.208 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:50.789 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:51.245 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.372 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.249 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.661 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.784 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:01.836 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.313 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.538 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.986 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.031 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.465 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:13.589 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 2024-11-11 16:42:41.101 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL NULL NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -469,7 +291,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 1017 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -478,9 +300,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] - selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] + selectExpressions: VectorUDFUnixTimeStampString(col 1:string) -> 3:bigint, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 7:int, VectorUDFWeekOfYearString(col 1:string) -> 8:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 9:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 10:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 11:int + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -488,7 +310,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -517,13 +339,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -565,18 +387,6 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### --2736243926 1883 4 17 17 16 4 14 34 --62018170411 4 9 22 22 39 18 26 29 -1365579826 2013 4 10 10 15 0 43 46 -206731024925 8521 1 16 16 3 20 42 5 -271201265 1978 8 5 5 31 14 41 5 -501208674 1985 11 18 18 47 16 37 54 -501208674 1985 11 18 18 47 16 37 54 -94573848655 4966 12 4 4 49 9 30 55 -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL @@ -663,7 +473,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3097 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1684 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -672,9 +482,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15] - selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean - Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: LongColEqualLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFUnixTimeStampString(col 1:string) -> 4:bigint) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 3:int, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 3:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearString(col 1:string) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 3:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 4:int) -> 11:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 3:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 4:int) -> 12:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 3:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 4:int) -> 13:boolean + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -682,7 +492,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -711,13 +521,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -799,18 +609,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -false false false false false false false false false -false false false false false false false false false -false false false false false false false false false -false false false false false false false false false PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -990,7 +788,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -999,12 +797,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0] + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 1:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 0:timestamp) -> timestamp, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1088,7 +886,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -0528-10-27 08:15:18.941718273 7160-12-02 06:00:24.81200852 8 52 +NULL NULL 0 40 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string @@ -1117,7 +915,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1126,12 +924,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0] + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1218,7 +1016,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -2.89160863229166E11 +NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -1261,7 +1059,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1270,13 +1068,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5, 8] - selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 3, 6] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1377,4 +1175,4 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -3.6145107904E10 false false false 7.5245155692476E10 7.5245155692476E10 7.5245155692476E10 8.0440455033059E10 +NULL NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index fe5fd23b9d..68b89a726e 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -1,19 +1,15 @@ -PREHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, - ctimestamp2 timestamp) STORED AS ORC +PREHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, - ctimestamp2 timestamp) STORED AS ORC +POSTHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@alltypesorc_string PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT - cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, - ctimestamp2 + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst FROM alltypesorc ORDER BY toutc, cst LIMIT 40 @@ -22,146 +18,16 @@ PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT - cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, - ctimestamp2 + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst FROM alltypesorc ORDER BY toutc, cst LIMIT 40 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values - (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), - (true, null, '1985-11-18 16:37:54.0', null), - (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values - (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), - (true, null, '1985-11-18 16:37:54.0', null), - (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] PREHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -206,15 +72,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -227,15 +85,7 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY @@ -258,18 +108,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), if(cboolean1, ctimestamp1, TIMESTAMP'1319-02-02 16:31:57.778') (type: timestamp), if(cboolean1, TIMESTAMP'2000-12-18 08:42:30.0005', ctimestamp1) (type: timestamp), if(cboolean1, ctimestamp1, ctimestamp2) (type: timestamp), if(cboolean1, ctimestamp1, null) (type: timestamp), if(cboolean1, null, ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 3, 14, 15, 16, 17, 18] - selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 9:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 10:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 11:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 12:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 13:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-02-02 16:31:57.778) -> 14:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 08:42:30.0005, col 1:timestamp) -> 15:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 16:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 17:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 18:timestamp - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 8:int, VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 9:int, VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 10:int, VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 11:int + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -277,8 +127,8 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Map Vectorization: enabled: true @@ -299,19 +149,19 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int), VALUE._col8 (type: boolean), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp), VALUE._col15 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -332,15 +182,7 @@ PREHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -355,72 +197,52 @@ POSTHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### --45479000681 528 10 27 27 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL -1632478712 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 NULL 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 NULL NULL NULL -163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 NULL 1966-08-16 13:36:50.183 -163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 NULL 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 NULL NULL NULL -490725011 1985 7 20 20 29 9 30 11 true 1985-07-20 09:30:11 1319-02-02 16:31:57.778 1985-07-20 09:30:11 2000-12-18 08:42:30.0005 1985-07-20 09:30:11 1985-07-20 09:30:11 NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:47.183 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:47.183 NULL 1969-12-31 15:59:47.183 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:52.843 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:52.843 NULL 1969-12-31 15:59:52.843 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.087 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.087 NULL 1969-12-31 15:59:53.087 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.55 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.55 NULL 1969-12-31 15:59:53.55 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:54.042 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:54.042 NULL 1969-12-31 15:59:54.042 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:54.686 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:54.686 NULL 1969-12-31 15:59:54.686 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:58.459 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:58.459 NULL 1969-12-31 15:59:58.459 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:00.889 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:00.889 NULL 1969-12-31 16:00:00.889 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:01.258 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:01.258 NULL 1969-12-31 16:00:01.258 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.698 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.698 NULL 1969-12-31 16:00:05.698 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:08.602 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:08.602 NULL 1969-12-31 16:00:08.602 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.214 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.214 NULL 1969-12-31 16:00:14.214 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:15.466 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:15.466 NULL 1969-12-31 16:00:15.466 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:46.123 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.989 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:51.119 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.961 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.967 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:53.593 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:53.641 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.407 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.439 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:56.031 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:57.719 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:58.636 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.176 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.423 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.477 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.93 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:01.839 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:02.13 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:03.151 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:03.756 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:06.134 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:07.209 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:10.361 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.525 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:13.589 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:13.839 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:15.601 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 2024-11-11 16:42:41.101 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL NULL NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -466,7 +288,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -475,9 +297,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] - selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] + selectExpressions: VectorUDFUnixTimeStampString(col 1:string) -> 3:bigint, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 7:int, VectorUDFWeekOfYearString(col 1:string) -> 8:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 9:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 10:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 11:int + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -485,7 +307,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Map Vectorization: @@ -513,13 +335,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -561,18 +383,6 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### --2736243926 1883 4 17 17 16 4 14 34 --62018170411 4 9 22 22 39 18 26 29 -1365579826 2013 4 10 10 15 0 43 46 -206731024925 8521 1 16 16 3 20 42 5 -271201265 1978 8 5 5 31 14 41 5 -501208674 1985 11 18 18 47 16 37 54 -501208674 1985 11 18 18 47 16 37 54 -94573848655 4966 12 4 4 49 9 30 55 -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL @@ -658,7 +468,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -667,9 +477,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15] - selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: LongColEqualLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFUnixTimeStampString(col 1:string) -> 4:bigint) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 3:int, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 3:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearString(col 1:string) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 3:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 4:int) -> 11:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 3:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 4:int) -> 12:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 3:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 4:int) -> 13:boolean + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -677,7 +487,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized Map Vectorization: @@ -705,13 +515,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -793,18 +603,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -false false false false false false false false false -false false false false false false false false false -false false false false false false false false false -false false false false false false false false false PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -981,7 +779,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -990,12 +788,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 1:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 0:timestamp) -> timestamp, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1078,7 +876,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -0528-10-27 08:15:18.941718273 7160-12-02 06:00:24.81200852 8 52 +NULL NULL 0 40 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string @@ -1106,7 +904,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1115,12 +913,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1206,7 +1004,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -2.89160863229166E11 +NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -1248,7 +1046,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1257,13 +1055,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5, 8] - selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double - Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 3, 6] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1363,4 +1161,4 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -3.6145107904E10 false false false 7.5245155692476E10 7.5245155692476E10 7.5245155692476E10 8.0440455033059E10 +NULL NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 8add8c0c45..e49321169e 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -34,19 +34,19 @@ Stage-2 default@src_stats,src_stats,Tbl:COMPLETE,Col:COMPLETE PREHOOK: query: analyze table src_stats compute statistics for columns -PREHOOK: type: ANALYZE_TABLE +PREHOOK: type: QUERY PREHOOK: Input: default@src_stats PREHOOK: Output: default@src_stats PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: analyze table src_stats compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stats POSTHOOK: Output: default@src_stats POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain analyze analyze table src_stats compute statistics for columns -PREHOOK: type: ANALYZE_TABLE +PREHOOK: type: QUERY POSTHOOK: query: explain analyze analyze table src_stats compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: type: QUERY Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) @@ -417,7 +417,7 @@ Stage-3 Output:["_col0"] Filter Operator [FIL_8] (rows=1/8 width=3) predicate:(cint < -1070551679) - TableScan [TS_0] (rows=5865/12288 width=3) + TableScan [TS_0] (rows=7484/12288 width=3) default@acid_dot,acid_dot, ACID table,Tbl:COMPLETE,Col:NONE,Output:["cint"] PREHOOK: query: select count(*) from acid_dot diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 01e915b8f1..244aca6d00 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -1,19 +1,15 @@ -PREHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, - ctimestamp2 timestamp) STORED AS ORC +PREHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, - ctimestamp2 timestamp) STORED AS ORC +POSTHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@alltypesorc_string PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT - cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, - ctimestamp2 + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst FROM alltypesorc ORDER BY toutc, cst LIMIT 40 @@ -22,146 +18,16 @@ PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT - cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, - ctimestamp2 + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst FROM alltypesorc ORDER BY toutc, cst LIMIT 40 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] -PREHOOK: query: INSERT INTO TABLE alltypesorc_string values - (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), - (true, null, '1985-11-18 16:37:54.0', null), - (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values - (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), - (true, null, '1985-11-18 16:37:54.0', null), - (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@alltypesorc_string -POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] -POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] PREHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -206,15 +72,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -227,15 +85,7 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY @@ -253,18 +103,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), if(cboolean1, ctimestamp1, TIMESTAMP'1319-02-02 16:31:57.778') (type: timestamp), if(cboolean1, TIMESTAMP'2000-12-18 08:42:30.0005', ctimestamp1) (type: timestamp), if(cboolean1, ctimestamp1, ctimestamp2) (type: timestamp), if(cboolean1, ctimestamp1, null) (type: timestamp), if(cboolean1, null, ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 3, 14, 15, 16, 17, 18] - selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 9:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 10:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 11:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 12:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 13:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-02-02 16:31:57.778) -> 14:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 08:42:30.0005, col 1:timestamp) -> 15:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 16:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 17:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 18:timestamp - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 8:int, VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 9:int, VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 10:int, VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 11:int + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -273,8 +123,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Map Vectorization: enabled: true @@ -291,12 +141,12 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int), VALUE._col8 (type: boolean), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp), VALUE._col15 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -317,15 +167,7 @@ PREHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -340,72 +182,52 @@ POSTHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1), - cboolean1, - ctimestamp1, - ctimestamp2, - if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), - if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), - if (cboolean1, ctimestamp1, ctimestamp2), - if (cboolean1, ctimestamp1, null), - if (cboolean1, null, ctimestamp2) + second(ctimestamp1) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### --45479000681 528 10 27 27 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL -1632478712 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 -1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 NULL 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 NULL NULL NULL -163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 NULL 1966-08-16 13:36:50.183 -163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 NULL 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 NULL NULL NULL -490725011 1985 7 20 20 29 9 30 11 true 1985-07-20 09:30:11 1319-02-02 16:31:57.778 1985-07-20 09:30:11 2000-12-18 08:42:30.0005 1985-07-20 09:30:11 1985-07-20 09:30:11 NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.028 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.028 NULL 1969-12-31 15:59:44.028 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.809 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.809 NULL 1969-12-31 15:59:44.809 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:45.949 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:45.949 NULL 1969-12-31 15:59:45.949 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:50.531 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:50.531 NULL 1969-12-31 15:59:50.531 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:51.009 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:51.009 NULL 1969-12-31 15:59:51.009 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.761 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.761 NULL 1969-12-31 15:59:53.761 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:00.905 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:00.905 NULL 1969-12-31 16:00:00.905 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:03.586 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:03.586 NULL 1969-12-31 16:00:03.586 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.227 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.227 NULL 1969-12-31 16:00:05.227 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.535 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.535 NULL 1969-12-31 16:00:05.535 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.02 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.02 NULL 1969-12-31 16:00:07.02 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.365 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.365 NULL 1969-12-31 16:00:07.365 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.517 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.517 NULL 1969-12-31 16:00:07.517 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.767 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.767 NULL 1969-12-31 16:00:07.767 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:08.602 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:08.602 NULL 1969-12-31 16:00:08.602 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:09.938 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:09.938 NULL 1969-12-31 16:00:09.938 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.214 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.214 NULL 1969-12-31 16:00:14.214 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.783 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.783 NULL 1969-12-31 16:00:14.783 -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:43.773 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.262 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.568 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:45.697 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.351 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.446 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.023 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.629 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.177 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.208 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:50.789 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:51.245 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.372 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.249 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.661 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.784 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.313 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.538 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.986 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.031 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.465 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 2024-11-11 16:42:41.101 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL NULL NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -446,7 +268,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -455,9 +277,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] - selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] + selectExpressions: VectorUDFUnixTimeStampString(col 1:string) -> 3:bigint, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 7:int, VectorUDFWeekOfYearString(col 1:string) -> 8:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 9:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 10:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 11:int + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -466,7 +288,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Map Vectorization: @@ -486,10 +308,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -531,18 +353,6 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### --2736243926 1883 4 17 17 16 4 14 34 --62018170411 4 9 22 22 39 18 26 29 -1365579826 2013 4 10 10 15 0 43 46 -206731024925 8521 1 16 16 3 20 42 5 -271201265 1978 8 5 5 31 14 41 5 -501208674 1985 11 18 18 47 16 37 54 -501208674 1985 11 18 18 47 16 37 54 -94573848655 4966 12 4 4 49 9 30 55 -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL @@ -623,7 +433,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -632,9 +442,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15] - selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: LongColEqualLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFUnixTimeStampString(col 1:string) -> 4:bigint) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 3:int, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 3:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearString(col 1:string) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 3:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 4:int) -> 11:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 3:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 4:int) -> 12:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 3:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 4:int) -> 13:boolean + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -643,7 +453,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized Map Vectorization: @@ -663,10 +473,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -748,18 +558,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -false false false false false false false false false -false false false false false false false false false -false false false false false false false false false -false false false false false false false false false PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -916,7 +714,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -925,12 +723,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 1:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 0:timestamp) -> timestamp, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1000,7 +798,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -0528-10-27 08:15:18.941718273 7160-12-02 06:00:24.81200852 8 52 +NULL NULL 0 40 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string @@ -1023,7 +821,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1032,12 +830,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1105,7 +903,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -2.89160863229166E11 +NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -1142,7 +940,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1151,13 +949,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5, 8] - selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double - Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 3, 6] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1239,4 +1037,4 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -3.6145107904E10 false false false 7.5245155692476E10 7.5245155692476E10 7.5245155692476E10 8.0440455033059E10 +NULL NULL NULL NULL NULL NULL NULL NULL diff --git serde/pom.xml serde/pom.xml index eca34af32d..e005585e4b 100644 --- serde/pom.xml +++ serde/pom.xml @@ -70,11 +70,6 @@ arrow-vector ${arrow.version} - - org.apache.arrow - arrow-vector - ${arrow.version} - com.carrotsearch hppc diff --git service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java index 0b3f2c3168..a02f13cc5e 100644 --- service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java +++ service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java @@ -23,17 +23,14 @@ import java.util.concurrent.SynchronousQueue; import java.util.concurrent.TimeUnit; -import javax.servlet.ServletContextEvent; -import javax.servlet.ServletContextListener; import javax.ws.rs.HttpMethod; -import org.apache.hadoop.hive.common.metrics.common.Metrics; -import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; -import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Shell; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.cli.CLIService; import org.apache.hive.service.rpc.thrift.TCLIService; @@ -156,33 +153,6 @@ public void run() { LOG.warn("XSRF filter disabled"); } - context.addEventListener(new ServletContextListener() { - @Override - public void contextInitialized(ServletContextEvent servletContextEvent) { - Metrics metrics = MetricsFactory.getInstance(); - if (metrics != null) { - try { - metrics.incrementCounter(MetricsConstant.OPEN_CONNECTIONS); - metrics.incrementCounter(MetricsConstant.CUMULATIVE_CONNECTION_COUNT); - } catch (Exception e) { - LOG.warn("Error reporting HS2 open connection operation to Metrics system", e); - } - } - } - - @Override - public void contextDestroyed(ServletContextEvent servletContextEvent) { - Metrics metrics = MetricsFactory.getInstance(); - if (metrics != null) { - try { - metrics.decrementCounter(MetricsConstant.OPEN_CONNECTIONS); - } catch (Exception e) { - LOG.warn("Error reporting HS2 close connection operation to Metrics system", e); - } - } - } - }); - final String httpPath = getHttpPath(hiveConf .getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_HTTP_PATH)); diff --git spark-client/src/test/java/org/apache/hive/spark/client/rpc/TestRpc.java spark-client/src/test/java/org/apache/hive/spark/client/rpc/TestRpc.java index 5653e4d0c4..fafdff77ea 100644 --- spark-client/src/test/java/org/apache/hive/spark/client/rpc/TestRpc.java +++ spark-client/src/test/java/org/apache/hive/spark/client/rpc/TestRpc.java @@ -197,10 +197,8 @@ public void testServerPort() throws Exception { try { autoClose(new RpcServer(config)); assertTrue("Invalid port range should throw an exception", false); // Should not reach here - } catch(IllegalArgumentException e) { - assertEquals( - "Malformed configuration value for " + HiveConf.ConfVars.SPARK_RPC_SERVER_PORT.varname, - e.getMessage()); + } catch(IOException e) { + assertEquals("Incorrect RPC server port configuration for HiveServer2", e.getMessage()); } // Retry logic diff --git standalone-metastore/pom.xml standalone-metastore/pom.xml index 5c536c90fa..c9eec9ddbc 100644 --- standalone-metastore/pom.xml +++ standalone-metastore/pom.xml @@ -399,8 +399,6 @@ Partition.java SerDeInfo.java StorageDescriptor.java - ColumnStatisticsDesc.java - ColumnStatisticsObj.java ${basedir}/src/main/resources/thrift-replacements.txt true diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsDesc.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsDesc.java index 44a57f234f..0e70758786 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsDesc.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsDesc.java @@ -166,8 +166,8 @@ public ColumnStatisticsDesc( this(); this.isTblLevel = isTblLevel; setIsTblLevelIsSet(true); - this.dbName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(dbName); - this.tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(tableName); + this.dbName = dbName; + this.tableName = tableName; } /** @@ -177,17 +177,17 @@ public ColumnStatisticsDesc(ColumnStatisticsDesc other) { __isset_bitfield = other.__isset_bitfield; this.isTblLevel = other.isTblLevel; if (other.isSetDbName()) { - this.dbName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.dbName); + this.dbName = other.dbName; } if (other.isSetTableName()) { - this.tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.tableName); + this.tableName = other.tableName; } if (other.isSetPartName()) { this.partName = other.partName; } this.lastAnalyzed = other.lastAnalyzed; if (other.isSetCatName()) { - this.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.catName); + this.catName = other.catName; } } @@ -234,7 +234,7 @@ public String getDbName() { } public void setDbName(String dbName) { - this.dbName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(dbName); + this.dbName = dbName; } public void unsetDbName() { @@ -257,7 +257,7 @@ public String getTableName() { } public void setTableName(String tableName) { - this.tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(tableName); + this.tableName = tableName; } public void unsetTableName() { @@ -325,7 +325,7 @@ public String getCatName() { } public void setCatName(String catName) { - this.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(catName); + this.catName = catName; } public void unsetCatName() { @@ -750,7 +750,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, ColumnStatisticsDes break; case 2: // DB_NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.dbName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.dbName = iprot.readString(); struct.setDbNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -758,7 +758,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, ColumnStatisticsDes break; case 3: // TABLE_NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.tableName = iprot.readString(); struct.setTableNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -782,7 +782,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, ColumnStatisticsDes break; case 6: // CAT_NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.catName = iprot.readString(); struct.setCatNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -880,9 +880,9 @@ public void read(org.apache.thrift.protocol.TProtocol prot, ColumnStatisticsDesc TTupleProtocol iprot = (TTupleProtocol) prot; struct.isTblLevel = iprot.readBool(); struct.setIsTblLevelIsSet(true); - struct.dbName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.dbName = iprot.readString(); struct.setDbNameIsSet(true); - struct.tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.tableName = iprot.readString(); struct.setTableNameIsSet(true); BitSet incoming = iprot.readBitSet(3); if (incoming.get(0)) { @@ -894,7 +894,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, ColumnStatisticsDesc struct.setLastAnalyzedIsSet(true); } if (incoming.get(2)) { - struct.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.catName = iprot.readString(); struct.setCatNameIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsObj.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsObj.java index 6f9a57fe53..9762478b2d 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsObj.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsObj.java @@ -139,8 +139,8 @@ public ColumnStatisticsObj( ColumnStatisticsData statsData) { this(); - this.colName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(colName); - this.colType = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(colType); + this.colName = colName; + this.colType = colType; this.statsData = statsData; } @@ -149,10 +149,10 @@ public ColumnStatisticsObj( */ public ColumnStatisticsObj(ColumnStatisticsObj other) { if (other.isSetColName()) { - this.colName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.colName); + this.colName = other.colName; } if (other.isSetColType()) { - this.colType = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.colType); + this.colType = other.colType; } if (other.isSetStatsData()) { this.statsData = new ColumnStatisticsData(other.statsData); @@ -175,7 +175,7 @@ public String getColName() { } public void setColName(String colName) { - this.colName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(colName); + this.colName = colName; } public void unsetColName() { @@ -198,7 +198,7 @@ public String getColType() { } public void setColType(String colType) { - this.colType = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(colType); + this.colType = colType; } public void unsetColType() { @@ -503,7 +503,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, ColumnStatisticsObj switch (schemeField.id) { case 1: // COL_NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.colName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.colName = iprot.readString(); struct.setColNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -511,7 +511,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, ColumnStatisticsObj break; case 2: // COL_TYPE if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.colType = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.colType = iprot.readString(); struct.setColTypeIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -579,9 +579,9 @@ public void write(org.apache.thrift.protocol.TProtocol prot, ColumnStatisticsObj @Override public void read(org.apache.thrift.protocol.TProtocol prot, ColumnStatisticsObj struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - struct.colName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.colName = iprot.readString(); struct.setColNameIsSet(true); - struct.colType = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.colType = iprot.readString(); struct.setColTypeIsSet(true); struct.statsData = new ColumnStatisticsData(); struct.statsData.read(iprot); diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java index 8f00fbdd24..ff634081db 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java @@ -491,7 +491,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, FieldSchema struct) switch (schemeField.id) { case 1: // NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.name = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.name = iprot.readString(); struct.setNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -499,7 +499,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, FieldSchema struct) break; case 2: // TYPE if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.type = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.type = iprot.readString(); struct.setTypeIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -507,7 +507,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, FieldSchema struct) break; case 3: // COMMENT if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.comment = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.comment = iprot.readString(); struct.setCommentIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -585,15 +585,15 @@ public void read(org.apache.thrift.protocol.TProtocol prot, FieldSchema struct) TTupleProtocol iprot = (TTupleProtocol) prot; BitSet incoming = iprot.readBitSet(3); if (incoming.get(0)) { - struct.name = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.name = iprot.readString(); struct.setNameIsSet(true); } if (incoming.get(1)) { - struct.type = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.type = iprot.readString(); struct.setTypeIsSet(true); } if (incoming.get(2)) { - struct.comment = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.comment = iprot.readString(); struct.setCommentIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java index 51f809a0f8..c58e1cb7d9 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java @@ -231,7 +231,7 @@ public Partition(Partition other) { this.privileges = new PrincipalPrivilegeSet(other.privileges); } if (other.isSetCatName()) { - this.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.catName); + this.catName = other.catName; } } @@ -467,7 +467,7 @@ public String getCatName() { } public void setCatName(String catName) { - this.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(catName); + this.catName = catName; } public void unsetCatName() { @@ -1029,7 +1029,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Partition struct) t break; case 2: // DB_NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.dbName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.dbName = iprot.readString(); struct.setDbNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -1037,7 +1037,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Partition struct) t break; case 3: // TABLE_NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.tableName = iprot.readString(); struct.setTableNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -1099,7 +1099,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Partition struct) t break; case 9: // CAT_NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.catName = iprot.readString(); struct.setCatNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -1284,11 +1284,11 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Partition struct) th struct.setValuesIsSet(true); } if (incoming.get(1)) { - struct.dbName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.dbName = iprot.readString(); struct.setDbNameIsSet(true); } if (incoming.get(2)) { - struct.tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.tableName = iprot.readString(); struct.setTableNameIsSet(true); } if (incoming.get(3)) { @@ -1325,7 +1325,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Partition struct) th struct.setPrivilegesIsSet(true); } if (incoming.get(8)) { - struct.catName = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.catName = iprot.readString(); struct.setCatNameIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java index 671c43efb3..71957f79f2 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java @@ -197,10 +197,10 @@ public SerDeInfo(SerDeInfo other) { this.description = other.description; } if (other.isSetSerializerClass()) { - this.serializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.serializerClass); + this.serializerClass = other.serializerClass; } if (other.isSetDeserializerClass()) { - this.deserializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.deserializerClass); + this.deserializerClass = other.deserializerClass; } if (other.isSetSerdeType()) { this.serdeType = other.serdeType; @@ -330,7 +330,7 @@ public String getSerializerClass() { } public void setSerializerClass(String serializerClass) { - this.serializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(serializerClass); + this.serializerClass = serializerClass; } public void unsetSerializerClass() { @@ -353,7 +353,7 @@ public String getDeserializerClass() { } public void setDeserializerClass(String deserializerClass) { - this.deserializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(deserializerClass); + this.deserializerClass = deserializerClass; } public void unsetDeserializerClass() { @@ -842,7 +842,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SerDeInfo struct) t switch (schemeField.id) { case 1: // NAME if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.name = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.name = iprot.readString(); struct.setNameIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -850,7 +850,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SerDeInfo struct) t break; case 2: // SERIALIZATION_LIB if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.serializationLib = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.serializationLib = iprot.readString(); struct.setSerializationLibIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -886,7 +886,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SerDeInfo struct) t break; case 5: // SERIALIZER_CLASS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.serializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.serializerClass = iprot.readString(); struct.setSerializerClassIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -894,7 +894,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SerDeInfo struct) t break; case 6: // DESERIALIZER_CLASS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.deserializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.deserializerClass = iprot.readString(); struct.setDeserializerClassIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -1047,11 +1047,11 @@ public void read(org.apache.thrift.protocol.TProtocol prot, SerDeInfo struct) th TTupleProtocol iprot = (TTupleProtocol) prot; BitSet incoming = iprot.readBitSet(7); if (incoming.get(0)) { - struct.name = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.name = iprot.readString(); struct.setNameIsSet(true); } if (incoming.get(1)) { - struct.serializationLib = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.serializationLib = iprot.readString(); struct.setSerializationLibIsSet(true); } if (incoming.get(2)) { @@ -1074,11 +1074,11 @@ public void read(org.apache.thrift.protocol.TProtocol prot, SerDeInfo struct) th struct.setDescriptionIsSet(true); } if (incoming.get(4)) { - struct.serializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.serializerClass = iprot.readString(); struct.setSerializerClassIsSet(true); } if (incoming.get(5)) { - struct.deserializerClass = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.deserializerClass = iprot.readString(); struct.setDeserializerClassIsSet(true); } if (incoming.get(6)) { diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java index 3cfa765fc1..00e60417ff 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java @@ -1308,7 +1308,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StorageDescriptor s break; case 2: // LOCATION if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.location = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.location = iprot.readString(); struct.setLocationIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -1316,7 +1316,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StorageDescriptor s break; case 3: // INPUT_FORMAT if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.inputFormat = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.inputFormat = iprot.readString(); struct.setInputFormatIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -1324,7 +1324,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StorageDescriptor s break; case 4: // OUTPUT_FORMAT if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.outputFormat = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.outputFormat = iprot.readString(); struct.setOutputFormatIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -1368,7 +1368,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StorageDescriptor s } iprot.readListEnd(); } - struct.bucketCols = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.bucketCols); struct.setBucketColsIsSet(true); + struct.setBucketColsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -1666,15 +1666,15 @@ public void read(org.apache.thrift.protocol.TProtocol prot, StorageDescriptor st struct.setColsIsSet(true); } if (incoming.get(1)) { - struct.location = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.location = iprot.readString(); struct.setLocationIsSet(true); } if (incoming.get(2)) { - struct.inputFormat = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.inputFormat = iprot.readString(); struct.setInputFormatIsSet(true); } if (incoming.get(3)) { - struct.outputFormat = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot.readString()); + struct.outputFormat = iprot.readString(); struct.setOutputFormatIsSet(true); } if (incoming.get(4)) { @@ -1701,7 +1701,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, StorageDescriptor st struct.bucketCols.add(_elem189); } } - struct.bucketCols = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.bucketCols); struct.setBucketColsIsSet(true); + struct.setBucketColsIsSet(true); } if (incoming.get(8)) { { diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 92d2e3f368..269798c702 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -241,11 +241,6 @@ public TTransport getTransport(TTransport trans) { private final Configuration conf; // stores datastore (jpox) properties, // right now they come from jpox.properties - // Flag to control that always threads are initialized only once - // instead of multiple times - private final static AtomicBoolean alwaysThreadsInitialized = - new AtomicBoolean(false); - private static String currentUrl; private FileMetadataManager fileMetadataManager; private PartitionExpressionProxy expressionProxy; @@ -563,21 +558,19 @@ public void init() throws MetaException { partitionValidationPattern = null; } - // We only initialize once the tasks that need to be run periodically - if (alwaysThreadsInitialized.compareAndSet(false, true)) { - ThreadPool.initialize(conf); - Collection taskNames = - MetastoreConf.getStringCollection(conf, ConfVars.TASK_THREADS_ALWAYS); - for (String taskName : taskNames) { - MetastoreTaskThread task = - JavaUtils.newInstance(JavaUtils.getClass(taskName, MetastoreTaskThread.class)); - task.setConf(conf); - long freq = task.runFrequency(TimeUnit.MILLISECONDS); - // For backwards compatibility, since some threads used to be hard coded but only run if - // frequency was > 0 - if (freq > 0) { - ThreadPool.getPool().scheduleAtFixedRate(task, freq, freq, TimeUnit.MILLISECONDS); - } + ThreadPool.initialize(conf); + Collection taskNames = + MetastoreConf.getStringCollection(conf, ConfVars.TASK_THREADS_ALWAYS); + for (String taskName : taskNames) { + MetastoreTaskThread task = + JavaUtils.newInstance(JavaUtils.getClass(taskName, MetastoreTaskThread.class)); + task.setConf(conf); + long freq = task.runFrequency(TimeUnit.MILLISECONDS); + // For backwards compatibility, since some threads used to be hard coded but only run if + // frequency was > 0 + if (freq > 0) { + ThreadPool.getPool().scheduleAtFixedRate(task, freq, freq, TimeUnit.MILLISECONDS); + } } expressionProxy = PartFilterExprUtil.createExpressionProxy(conf); @@ -653,10 +646,6 @@ public void setMetaConf(String key, String value) throws MetaException { setHMSHandler(this); configuration.set(key, value); notifyMetaListeners(key, oldValue, value); - - if (ConfVars.TRY_DIRECT_SQL == confVar) { - HMSHandler.LOG.info("Direct SQL optimization = {}", value); - } } @Override @@ -8897,9 +8886,6 @@ public void processContext(ServerContext serverContext, TTransport tTransport, T HMSHandler.LOG.info("TCP keepalive = " + tcpKeepAlive); HMSHandler.LOG.info("Enable SSL = " + useSSL); - boolean directSqlEnabled = MetastoreConf.getBoolVar(conf, ConfVars.TRY_DIRECT_SQL); - HMSHandler.LOG.info("Direct SQL optimization = {}", directSqlEnabled); - if (startLock != null) { signalOtherThreadsToStart(tServer, startLock, startCondition, startedServing); } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 48f77b9878..56fbfed944 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -182,7 +182,7 @@ public MetaStoreDirectSql(PersistenceManager pm, Configuration conf, String sche boolean isInTest = MetastoreConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST); isCompatibleDatastore = (!isInTest || ensureDbInit()) && runTestQuery(); if (isCompatibleDatastore) { - LOG.debug("Using direct SQL, underlying DB is " + dbType); + LOG.info("Using direct SQL, underlying DB is " + dbType); } } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 264fdb9db9..b0a805f047 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -3490,7 +3490,6 @@ public T run(boolean initTable) throws MetaException, NoSuchObjectException { try { directSql.prepareTxn(); this.results = getSqlResult(this); - LOG.debug("Using direct SQL optimization."); } catch (Exception ex) { handleDirectSqlError(ex); } @@ -3500,7 +3499,6 @@ public T run(boolean initTable) throws MetaException, NoSuchObjectException { // 2) DirectSQL threw and was disabled in handleDirectSqlError. if (!doUseDirectSql) { this.results = getJdoResult(this); - LOG.debug("Not using direct SQL optimization."); } return commit(); } catch (NoSuchObjectException ex) { diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java index fed3dda809..055a46e5e4 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java @@ -27,7 +27,6 @@ import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.SecurityUtils; @@ -54,7 +53,7 @@ private Map tableParams; private boolean rewriteEnabled, temporary; private Set mvReferencedTables; - private PrincipalType ownerType; + public TableBuilder() { // Set some reasonable defaults @@ -95,11 +94,6 @@ public TableBuilder setOwner(String owner) { return this; } - public TableBuilder setOwnerType(PrincipalType ownerType) { - this.ownerType = ownerType; - return this; - } - public TableBuilder setViewOriginalText(String viewOriginalText) { this.viewOriginalText = viewOriginalText; return this; @@ -191,9 +185,6 @@ public Table build(Configuration conf) throws MetaException { if (tableName == null) { throw new MetaException("You must set the table name"); } - if (ownerType == null) { - ownerType = PrincipalType.USER; - } if (owner == null) { try { owner = SecurityUtils.getUser(); diff --git standalone-metastore/src/main/resources/thrift-replacements.txt standalone-metastore/src/main/resources/thrift-replacements.txt index d985717902..01ee71a5b1 100644 --- standalone-metastore/src/main/resources/thrift-replacements.txt +++ standalone-metastore/src/main/resources/thrift-replacements.txt @@ -35,11 +35,6 @@ this\.inputFormat\ \=\ inputFormat;=this.inputFormat\ \=\ org.apache.hadoop.hive this\.outputFormat\ \=\ outputFormat;=this.outputFormat\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(outputFormat); this\.dbName\ \=\ dbName;=this.dbName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(dbName); this\.tableName\ \=\ tableName;=this.tableName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(tableName); -this\.catName\ \=\ catName;=this\.catName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(catName); -this\.serializerClass\ \=\ serializerClass;=this\.serializerClass\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(serializerClass); -this\.deserializerClass\ \=\ deserializerClass;=this\.deserializerClass\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(deserializerClass); -this\.colName\ \=\ colName;=this\.colName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(colName); -this\.colType\ \=\ colType;=this\.colType\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(colType); # Fix constructors and setters of List instance fields @@ -60,11 +55,6 @@ this\.inputFormat\ \=\ other\.inputFormat;=this.inputFormat\ \=\ org.apache.hado this\.outputFormat\ \=\ other\.outputFormat;=this.outputFormat\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.outputFormat); this\.dbName\ \=\ other\.dbName;=this.dbName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.dbName); this\.tableName\ \=\ other\.tableName;=this.tableName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.tableName); -this\.catName\ \=\ other\.catName;=this\.catName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.catName); -this\.serializerClass\ \=\ other\.serializerClass;=this\.serializerClass\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.serializerClass); -this\.deserializerClass\ \=\ other\.deserializerClass;=this\.deserializerClass\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.deserializerClass); -this\.colName\ \=\ other\.colName;=this\.colName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.colName); -this\.colType\ \=\ other\.colType;=this\.colType\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other.colType); __this__parameters_copy_key\ \=\ other_element_key;=__this__parameters_copy_key\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other_element_key); __this__parameters_copy_value\ \=\ other_element_value;=__this__parameters_copy_value\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other_element_value); @@ -77,30 +67,3 @@ this\.parameters\.put\(key,\ val\);=this.parameters.put(org.apache.hadoop.hive.m # Fix the deserialization methods in Partitions.java: intern parameters after it's deserialized struct\.setParametersIsSet\(true\);=struct.parameters\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters);\ struct.setParametersIsSet(true); - -# Fix the StandardScheme read method which deserializes the fields into the thrift objects - -# PartitionStandardScheme - parameters are already interned above -struct\.dbName\ \=\ iprot\.readString\(\);=struct\.dbName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.tableName\ \=\ iprot\.readString\(\);=struct\.tableName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.catName\ \=\ iprot\.readString\(\);=struct\.catName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); - -# StorageDescriptorStandardScheme - parameters are already interned above -struct\.location\ \=\ iprot\.readString\(\);=struct\.location\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.inputFormat\ \=\ iprot\.readString\(\);=struct\.inputFormat\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.outputFormat\ \=\ iprot\.readString\(\);=struct\.outputFormat\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.setBucketColsIsSet\(true\);=struct\.bucketCols\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.bucketCols);\ struct.setBucketColsIsSet(true); - -# SerDeInfoStandardScheme - parameters are already interned above -struct\.name\ \=\ iprot\.readString\(\);=struct\.name\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.serializationLib\ \=\ iprot\.readString\(\);=struct\.serializationLib\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.serializerClass\ \=\ iprot\.readString\(\);=struct\.serializerClass\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.deserializerClass\ \=\ iprot\.readString\(\);=struct\.deserializerClass\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); - -# FieldSchemaStandardScheme - name field gets automatically handled above -struct\.type\ \=\ iprot\.readString\(\);=struct\.type\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.comment\ \=\ iprot\.readString\(\);=struct\.comment\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); - -# ColumnStatisticsObjStandardScheme -struct\.colName\ \=\ iprot\.readString\(\);=struct\.colName\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); -struct\.colType\ \=\ iprot\.readString\(\);=struct\.colType\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(iprot\.readString\(\)); \ No newline at end of file diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultClient.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultClient.java index 550b107d3d..dfe05e98f0 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultClient.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultClient.java @@ -30,7 +30,7 @@ * This tests metastore client calls that do not specify a catalog but with the config on the * client set to go to a non-default catalog. */ -public class TestCatalogNonDefaultClient extends NonCatCallsWithCatalog { +public class TestCatalogNonDefaultClient extends TestNonCatCallsWithCatalog { final private String catName = "non_default_catalog"; private String catLocation; diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultSvr.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultSvr.java index cf909ac299..13c8723b53 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultSvr.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogNonDefaultSvr.java @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hive.metastore; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.api.Catalog; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.client.builder.CatalogBuilder; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; import org.apache.thrift.TException; import org.junit.After; @@ -28,7 +30,7 @@ * This tests metastore client calls that do not specify a catalog but with the config on the * server set to go to a non-default catalog. */ -public class TestCatalogNonDefaultSvr extends NonCatCallsWithCatalog { +public class TestCatalogNonDefaultSvr extends TestNonCatCallsWithCatalog { final private String catName = "non_default_svr_catalog"; private String catLocation; diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java index fc996c8c71..bb57b85d17 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java @@ -25,7 +25,7 @@ * This tests calls with an older client, to make sure that if the client supplies no catalog * information the server still does the right thing. I assumes the default catalog */ -public class TestCatalogOldClient extends NonCatCallsWithCatalog { +public class TestCatalogOldClient extends TestNonCatCallsWithCatalog { @Override protected IMetaStoreClient getClient() throws MetaException { diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/NonCatCallsWithCatalog.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestNonCatCallsWithCatalog.java similarity index 99% rename from standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/NonCatCallsWithCatalog.java rename to standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestNonCatCallsWithCatalog.java index 0194178f0f..55ef885aec 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/NonCatCallsWithCatalog.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestNonCatCallsWithCatalog.java @@ -66,7 +66,6 @@ import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import java.io.File; @@ -83,7 +82,7 @@ import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; -public abstract class NonCatCallsWithCatalog { +public abstract class TestNonCatCallsWithCatalog { private static final String OTHER_DATABASE = "non_cat_other_db"; private Table[] testTables = new Table[6]; diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java index 717c5ee848..49033d3943 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java @@ -49,7 +49,6 @@ import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.slf4j.Logger; @@ -124,7 +123,6 @@ public void tearDown() { /** * Tests partition operations */ - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void testPartitionOps() throws Exception { String dbName = "default"; diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestStats.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestStats.java index 1b01432db4..6cca062268 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestStats.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestStats.java @@ -46,7 +46,6 @@ import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.slf4j.Logger; @@ -251,7 +250,6 @@ public void tableInHiveCatalog() throws TException { dropStats(DEFAULT_CATALOG_NAME, dbName, tableName, null, colMap.keySet()); } - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void partitionedTableInHiveCatalog() throws TException { String dbName = "db_part_stats"; @@ -276,7 +274,6 @@ public void tableOtherCatalog() throws TException { dropStats(catName, dbName, tableName, null, colMap.keySet()); } - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void partitionedTableOtherCatalog() throws TException { String catName = "cat_table_stats"; @@ -301,7 +298,6 @@ public void tableDeprecatedCalls() throws TException { dropStats(NO_CAT, dbName, tableName, null, colMap.keySet()); } - @Ignore("HIVE-19509: Disable tests that are failing continuously") @Test public void partitionedTableDeprecatedCalls() throws TException { String dbName = "old_db_part_stats"; diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java index be9e7c94c4..fe2d7587f6 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java @@ -36,7 +36,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; @@ -257,7 +256,6 @@ public void testCreateTableDefaultValues() throws Exception { client.createTable(table); Table createdTable = client.getTable(table.getDbName(), table.getTableName()); - Assert.assertEquals("Comparing OwnerType", PrincipalType.USER, createdTable.getOwnerType()); Assert.assertNull("Comparing OwnerName", createdTable.getOwner()); Assert.assertNotEquals("Comparing CreateTime", 0, createdTable.getCreateTime()); Assert.assertEquals("Comparing LastAccessTime", 0, createdTable.getLastAccessTime()); @@ -1336,7 +1334,6 @@ private Table getTableWithAllParametersSet() throws MetaException { .setDbName(DEFAULT_DATABASE) .setTableName("test_table_with_all_parameters_set") .setCreateTime(100) - .setOwnerType(PrincipalType.ROLE) .setOwner("owner") .setLastAccessTime(200) .addPartCol("part_col", "int", "part col comment") diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index f8ed7e29e5..bebf7691f7 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -180,70 +180,6 @@ public String stringifyColumn(int columnNum) { return b.toString(); } - private void appendVectorType(StringBuilder b, ColumnVector cv) { - String colVectorType = null; - if (cv instanceof LongColumnVector) { - colVectorType = "LONG"; - } else if (cv instanceof DoubleColumnVector) { - colVectorType = "DOUBLE"; - } else if (cv instanceof BytesColumnVector) { - colVectorType = "BYTES"; - } else if (cv instanceof DecimalColumnVector) { - colVectorType = "DECIMAL"; - } else if (cv instanceof TimestampColumnVector) { - colVectorType = "TIMESTAMP"; - } else if (cv instanceof IntervalDayTimeColumnVector) { - colVectorType = "INTERVAL_DAY_TIME"; - } else if (cv instanceof ListColumnVector) { - colVectorType = "LIST"; - } else if (cv instanceof MapColumnVector) { - colVectorType = "MAP"; - } else if (cv instanceof StructColumnVector) { - colVectorType = "STRUCT"; - } else if (cv instanceof UnionColumnVector) { - colVectorType = "UNION"; - } else { - colVectorType = "Unknown"; - } - b.append(colVectorType); - - if (cv instanceof ListColumnVector) { - ListColumnVector listColumnVector = (ListColumnVector) cv; - b.append("<"); - appendVectorType(b, listColumnVector.child); - b.append(">"); - } else if (cv instanceof MapColumnVector) { - MapColumnVector mapColumnVector = (MapColumnVector) cv; - b.append("<"); - appendVectorType(b, mapColumnVector.keys); - b.append(", "); - appendVectorType(b, mapColumnVector.values); - b.append(">"); - } else if (cv instanceof StructColumnVector) { - StructColumnVector structColumnVector = (StructColumnVector) cv; - b.append("<"); - final int fieldCount = structColumnVector.fields.length; - for (int i = 0; i < fieldCount; i++) { - if (i > 0) { - b.append(", "); - } - appendVectorType(b, structColumnVector.fields[i]); - } - b.append(">"); - } else if (cv instanceof UnionColumnVector) { - UnionColumnVector unionColumnVector = (UnionColumnVector) cv; - b.append("<"); - final int fieldCount = unionColumnVector.fields.length; - for (int i = 0; i < fieldCount; i++) { - if (i > 0) { - b.append(", "); - } - appendVectorType(b, unionColumnVector.fields[i]); - } - b.append(">"); - } - } - public String stringify(String prefix) { if (size == 0) { return ""; @@ -259,10 +195,33 @@ public String stringify(String prefix) { } b.append(projIndex); b.append(":"); - appendVectorType(b, cv); + String colVectorType = null; + if (cv instanceof LongColumnVector) { + colVectorType = "LONG"; + } else if (cv instanceof DoubleColumnVector) { + colVectorType = "DOUBLE"; + } else if (cv instanceof BytesColumnVector) { + colVectorType = "BYTES"; + } else if (cv instanceof DecimalColumnVector) { + colVectorType = "DECIMAL"; + } else if (cv instanceof TimestampColumnVector) { + colVectorType = "TIMESTAMP"; + } else if (cv instanceof IntervalDayTimeColumnVector) { + colVectorType = "INTERVAL_DAY_TIME"; + } else if (cv instanceof ListColumnVector) { + colVectorType = "LIST"; + } else if (cv instanceof MapColumnVector) { + colVectorType = "MAP"; + } else if (cv instanceof StructColumnVector) { + colVectorType = "STRUCT"; + } else if (cv instanceof UnionColumnVector) { + colVectorType = "UNION"; + } else { + colVectorType = "Unknown"; + } + b.append(colVectorType); } b.append('\n'); - b.append(prefix); if (this.selectedInUse) { for (int j = 0; j < size; j++) { diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java index 3d9f26267e..7bc03ed53a 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java @@ -119,7 +119,7 @@ public void testStringify() throws IOException { byte[] buffer = ("value " + r).getBytes(StandardCharsets.UTF_8); y.setRef(r, buffer, 0, buffer.length); } - final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + + final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + "[[0, 2000-01-01 00:00:01.0], \"value 0\"]\n" + "[[3, 2000-01-01 00:00:02.0], \"value 1\"]\n" + "[[6, 2000-01-01 00:00:03.0], \"value 2\"]\n" + @@ -153,7 +153,7 @@ public void testStringify2() throws IOException { byte[] buffer = ("value " + r).getBytes(StandardCharsets.UTF_8); y.setRef(r, buffer, 0, buffer.length); } - final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + + final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + "[[0, 2000-01-01 00:00:01], \"value 0\"]\n" + "[[3, 2000-01-01 00:00:02], \"value 1\"]\n" + "[[6, 2000-01-01 00:00:03], \"value 2\"]\n" + diff --git streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java index 0866850556..685e0cc989 100644 --- streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java +++ streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java @@ -64,48 +64,48 @@ private static final String DEFAULT_LINE_DELIMITER_PATTERN = "[\r\n]"; protected HiveConf conf; - protected StreamingConnection conn; + private StreamingConnection conn; protected Table table; - protected List inputColumns; - protected List inputTypes; - protected String fullyQualifiedTableName; - protected Map> updaters = new HashMap<>(); - protected Map partitionPaths = new HashMap<>(); - protected Set addedPartitions = new HashSet<>(); + List inputColumns; + List inputTypes; + private String fullyQualifiedTableName; + private Map> updaters = new HashMap<>(); + private Map partitionPaths = new HashMap<>(); + private Set addedPartitions = new HashSet<>(); // input OI includes table columns + partition columns - protected StructObjectInspector inputRowObjectInspector; + private StructObjectInspector inputRowObjectInspector; // output OI strips off the partition columns and retains other columns - protected ObjectInspector outputRowObjectInspector; - protected List partitionColumns = new ArrayList<>(); - protected ObjectInspector[] partitionObjInspectors = null; - protected StructField[] partitionStructFields = null; - protected Object[] partitionFieldData; - protected ObjectInspector[] bucketObjInspectors = null; - protected StructField[] bucketStructFields = null; - protected Object[] bucketFieldData; - protected List bucketIds = new ArrayList<>(); - protected int totalBuckets; - protected String defaultPartitionName; - protected boolean isBucketed; - protected AcidOutputFormat acidOutputFormat; - protected Long curBatchMinWriteId; - protected Long curBatchMaxWriteId; - protected final String lineDelimiter; - protected HeapMemoryMonitor heapMemoryMonitor; + private ObjectInspector outputRowObjectInspector; + private List partitionColumns = new ArrayList<>(); + private ObjectInspector[] partitionObjInspectors = null; + private StructField[] partitionStructFields = null; + private Object[] partitionFieldData; + private ObjectInspector[] bucketObjInspectors = null; + private StructField[] bucketStructFields = null; + private Object[] bucketFieldData; + private List bucketIds = new ArrayList<>(); + private int totalBuckets; + private String defaultPartitionName; + private boolean isBucketed; + private AcidOutputFormat acidOutputFormat; + private Long curBatchMinWriteId; + private Long curBatchMaxWriteId; + private final String lineDelimiter; + private HeapMemoryMonitor heapMemoryMonitor; // if low memory canary is set and if records after set canary exceeds threshold, trigger a flush. // This is to avoid getting notified of low memory too often and flushing too often. - protected AtomicBoolean lowMemoryCanary; - protected long ingestSizeBytes = 0; - protected boolean autoFlush; - protected float memoryUsageThreshold; - protected long ingestSizeThreshold; + private AtomicBoolean lowMemoryCanary; + private long ingestSizeBytes = 0; + private boolean autoFlush; + private float memoryUsageThreshold; + private long ingestSizeThreshold; public AbstractRecordWriter(final String lineDelimiter) { this.lineDelimiter = lineDelimiter == null || lineDelimiter.isEmpty() ? DEFAULT_LINE_DELIMITER_PATTERN : lineDelimiter; } - protected static class OrcMemoryPressureMonitor implements HeapMemoryMonitor.Listener { + private static class OrcMemoryPressureMonitor implements HeapMemoryMonitor.Listener { private static final Logger LOG = LoggerFactory.getLogger(OrcMemoryPressureMonitor.class.getName()); private final AtomicBoolean lowMemoryCanary; @@ -128,41 +128,31 @@ public void init(StreamingConnection conn, long minWriteId, long maxWriteId) thr if (conn == null) { throw new StreamingException("Streaming connection cannot be null during record writer initialization"); } - this.conn = conn; - this.curBatchMinWriteId = minWriteId; - this.curBatchMaxWriteId = maxWriteId; - this.conf = conn.getHiveConf(); - this.defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); - this.table = conn.getTable(); - this.inputColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList()); - this.inputTypes = table.getSd().getCols().stream().map(FieldSchema::getType).collect(Collectors.toList()); - if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) { - this.partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName) - .collect(Collectors.toList()); - this.inputColumns.addAll(partitionColumns); - this.inputTypes - .addAll(table.getPartitionKeys().stream().map(FieldSchema::getType).collect(Collectors.toList())); - } - this.fullyQualifiedTableName = Warehouse.getQualifiedName(table.getDbName(), table.getTableName()); - String outFormatName = this.table.getSd().getOutputFormat(); try { + this.conn = conn; + this.curBatchMinWriteId = minWriteId; + this.curBatchMaxWriteId = maxWriteId; + this.conf = conn.getHiveConf(); + this.defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); + this.table = conn.getTable(); + this.inputColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList()); + this.inputTypes = table.getSd().getCols().stream().map(FieldSchema::getType).collect(Collectors.toList()); + if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) { + this.partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName) + .collect(Collectors.toList()); + this.inputColumns.addAll(partitionColumns); + this.inputTypes + .addAll(table.getPartitionKeys().stream().map(FieldSchema::getType).collect(Collectors.toList())); + } + this.fullyQualifiedTableName = Warehouse.getQualifiedName(table.getDbName(), table.getTableName()); + String outFormatName = this.table.getSd().getOutputFormat(); this.acidOutputFormat = (AcidOutputFormat) ReflectionUtils .newInstance(JavaUtils.loadClass(outFormatName), conf); + setupMemoryMonitoring(); } catch (ClassNotFoundException e) { - String shadePrefix = conf.getVar(HiveConf.ConfVars.HIVE_CLASSLOADER_SHADE_PREFIX); - if (shadePrefix != null && !shadePrefix.trim().isEmpty()) { - try { - LOG.info("Shade prefix: {} specified. Using as fallback to load {}..", shadePrefix, outFormatName); - this.acidOutputFormat = (AcidOutputFormat) ReflectionUtils - .newInstance(JavaUtils.loadClass(shadePrefix, outFormatName), conf); - } catch (ClassNotFoundException e1) { - throw new StreamingException(e.getMessage(), e); - } - } else { - throw new StreamingException(e.getMessage(), e); - } + throw new StreamingException(e.getMessage(), e); } - setupMemoryMonitoring(); + try { final AbstractSerDe serDe = createSerde(); this.inputRowObjectInspector = (StructObjectInspector) serDe.getObjectInspector(); @@ -179,7 +169,7 @@ public void init(StreamingConnection conn, long minWriteId, long maxWriteId) thr } } - protected void setupMemoryMonitoring() { + private void setupMemoryMonitoring() { this.autoFlush = conf.getBoolVar(HiveConf.ConfVars.HIVE_STREAMING_AUTO_FLUSH_ENABLED); this.memoryUsageThreshold = conf.getFloatVar(HiveConf.ConfVars.HIVE_HEAP_MEMORY_MONITOR_USAGE_THRESHOLD); this.ingestSizeThreshold = conf.getSizeVar(HiveConf.ConfVars.HIVE_STREAMING_AUTO_FLUSH_CHECK_INTERVAL_SIZE); @@ -201,7 +191,7 @@ protected void setupMemoryMonitoring() { } } - protected void prepareBucketingFields() { + private void prepareBucketingFields() { this.isBucketed = table.getSd().getNumBuckets() > 0; // For unbucketed tables we have exactly 1 RecordUpdater (until HIVE-19208) for each AbstractRecordWriter which // ends up writing to a file bucket_000000. @@ -219,7 +209,7 @@ protected void prepareBucketingFields() { } } - protected void preparePartitioningFields() { + private void preparePartitioningFields() { final int numPartitions = table.getPartitionKeys().size(); this.partitionFieldData = new Object[numPartitions]; this.partitionObjInspectors = new ObjectInspector[numPartitions]; @@ -240,12 +230,12 @@ protected void preparePartitioningFields() { /** * used to tag error msgs to provided some breadcrumbs */ - protected String getWatermark(String partition) { + private String getWatermark(String partition) { return partition + " writeIds[" + curBatchMinWriteId + "," + curBatchMaxWriteId + "]"; } // return the column numbers of the bucketed columns - protected List getBucketColIDs(List bucketCols, List cols) { + private List getBucketColIDs(List bucketCols, List cols) { ArrayList result = new ArrayList<>(bucketCols.size()); HashSet bucketSet = new HashSet<>(bucketCols); for (int i = 0; i < cols.size(); i++) { @@ -275,7 +265,7 @@ protected String getWatermark(String partition) { public abstract Object encode(byte[] record) throws SerializationError; // returns the bucket number to which the record belongs to - protected int getBucket(Object row) { + private int getBucket(Object row) { if (!isBucketed) { return 0; } @@ -288,7 +278,7 @@ protected int getBucket(Object row) { ObjectInspectorUtils.getBucketNumberOld(bucketFields, bucketObjInspectors, totalBuckets); } - protected List getPartitionValues(final Object row) { + private List getPartitionValues(final Object row) { if (!conn.isPartitionedTable()) { return null; } @@ -359,7 +349,7 @@ public void close() throws StreamingIOFailure { } } - protected static ObjectInspector[] getObjectInspectorsForBucketedCols(List bucketIds + private static ObjectInspector[] getObjectInspectorsForBucketedCols(List bucketIds , StructObjectInspector recordObjInspector) { ObjectInspector[] result = new ObjectInspector[bucketIds.size()]; @@ -371,14 +361,14 @@ public void close() throws StreamingIOFailure { return result; } - protected Object[] getBucketFields(Object row) { + private Object[] getBucketFields(Object row) { for (int i = 0; i < bucketIds.size(); i++) { bucketFieldData[i] = inputRowObjectInspector.getStructFieldData(row, bucketStructFields[i]); } return bucketFieldData; } - protected Object[] getPartitionFields(Object row) { + private Object[] getPartitionFields(Object row) { for (int i = 0; i < partitionFieldData.length; i++) { partitionFieldData[i] = inputRowObjectInspector.getStructFieldData(row, partitionStructFields[i]); } @@ -412,7 +402,7 @@ public void write(final long writeId, final byte[] record) throws StreamingExcep } } - protected void checkAutoFlush() throws StreamingIOFailure { + private void checkAutoFlush() throws StreamingIOFailure { if (!autoFlush) { return; } @@ -444,7 +434,7 @@ protected void checkAutoFlush() throws StreamingIOFailure { return addedPartitions; } - protected RecordUpdater createRecordUpdater(final Path partitionPath, int bucketId, Long minWriteId, + private RecordUpdater createRecordUpdater(final Path partitionPath, int bucketId, Long minWriteId, Long maxWriteID) throws IOException { // Initialize table properties from the table parameters. This is required because the table @@ -463,7 +453,7 @@ protected RecordUpdater createRecordUpdater(final Path partitionPath, int bucket .finalDestination(partitionPath)); } - protected RecordUpdater getRecordUpdater(List partitionValues, int bucketId) throws StreamingIOFailure { + private RecordUpdater getRecordUpdater(List partitionValues, int bucketId) throws StreamingIOFailure { RecordUpdater recordUpdater; String key; Path destLocation; @@ -510,7 +500,7 @@ protected RecordUpdater getRecordUpdater(List partitionValues, int bucke return recordUpdater; } - protected List initializeBuckets() { + private List initializeBuckets() { List result = new ArrayList<>(totalBuckets); for (int bucket = 0; bucket < totalBuckets; bucket++) { result.add(bucket, null); //so that get(i) returns null rather than ArrayOutOfBounds @@ -518,7 +508,7 @@ protected RecordUpdater getRecordUpdater(List partitionValues, int bucke return result; } - protected void logStats(final String prefix) { + private void logStats(final String prefix) { int openRecordUpdaters = updaters.values() .stream() .mapToInt(List::size) diff --git streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java index f69721123c..85887b2006 100644 --- streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java +++ streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java @@ -373,7 +373,7 @@ public PartitionInfo createPartitionIfNotExists(final List partitionValu exists = true; } catch (HiveException | TException e) { throw new StreamingException("Unable to creation partition for values: " + partitionValues + " connection: " + - toConnectionInfoString(), e); + toConnectionInfoString()); } return new PartitionInfo(partName, partLocation, exists); } @@ -460,7 +460,7 @@ private void beginNextTransaction() throws StreamingException { } if (currentTransactionBatch.isClosed()) { - throw new StreamingException("Cannot begin next transaction on a closed streaming connection"); + throw new IllegalStateException("Cannot begin next transaction on a closed streaming connection"); } if (currentTransactionBatch.remainingTransactions() == 0) { diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/api/client/JenkinsQueueUtil.java testutils/ptest2/src/main/java/org/apache/hive/ptest/api/client/JenkinsQueueUtil.java new file mode 100644 index 0000000000..f33516447e --- /dev/null +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/api/client/JenkinsQueueUtil.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.ptest.api.client; + +import java.io.IOException; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.cli.CommandLine; +import org.apache.http.HttpResponse; +import org.apache.http.StatusLine; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.ssl.SSLContexts; +import org.apache.http.util.EntityUtils; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; + +/** + * Utility class for the Precommit test job queue on Jenkins + */ +public class JenkinsQueueUtil { + + private static final String JSON_ITEMS_FIELD = "items"; + private static final String JSON_TASK_FIELD = "task"; + private static final String JSON_TASK_NAME_FIELD = "name"; + private static final String JSON_PARAMETERS_FIELD = "parameters"; + private static final String JSON_PARAMETER_NAME_FIELD = "name"; + private static final String JSON_PARAMETER_VALUE_FIELD = "value"; + + private static final String JOB_NAME = "PreCommit-HIVE-Build"; + private static final String ISSUE_FIELD_KEY = "ISSUE_NUM"; + private static final String JIRA_KEY_PREFIX = "HIVE-"; + + /** + * Looks up the current queue of the precommit job on a jenkins instance (specified by + * PTestClient.JENKINS_QUEUE_URL), and checks if current Jira is standing in queue already (i.e. + * will be executed in the future too) + * + * @param commandLine PTestClient's command line option values' list + * @return whether or not the Jira specified in the command line can be found in the job queue + */ + public static boolean isJiraAlreadyInQueue(CommandLine commandLine) { + if (!(commandLine.hasOption(PTestClient.JENKINS_QUEUE_URL) && + commandLine.hasOption(PTestClient.JIRA))) { + return false; + } + try { + System.out.println("Checking " + JOB_NAME + " queue..."); + String queueJson = httpGet(commandLine.getOptionValue(PTestClient.JENKINS_QUEUE_URL)); + List jirasInQueue = parseJiras(queueJson); + if (jirasInQueue.size() > 0) { + System.out.println(JOB_NAME + " has the following jira(s) in queue: " + jirasInQueue); + } else { + return false; + } + + String jira = commandLine.getOptionValue(PTestClient.JIRA).replaceAll(JIRA_KEY_PREFIX,""); + if (jirasInQueue.contains(jira)) { + return true; + } + + } catch (IOException e) { + System.err.println("Error checking " + JOB_NAME + " build queue: " + e); + } + return false; + } + + /** + * Parses raw json to produce a list of Jira number strings. + * @param queueJson + * @return + * @throws IOException + */ + private static List parseJiras(String queueJson) throws IOException { + List jirasInQueue = new ArrayList<>(); + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode rootNode = objectMapper.readTree(queueJson); + List items = Lists.newArrayList(rootNode.findValue(JSON_ITEMS_FIELD).iterator()); + for (JsonNode item : items) { + String taskName = item.path(JSON_TASK_FIELD).path(JSON_TASK_NAME_FIELD).asText(); + if (JOB_NAME.equals(taskName)) { + List parameters = Lists.newArrayList(item.findValue(JSON_PARAMETERS_FIELD)); + for (JsonNode parameter : parameters) { + if (ISSUE_FIELD_KEY.equals(parameter.path(JSON_PARAMETER_NAME_FIELD).asText())) { + jirasInQueue.add(parameter.path(JSON_PARAMETER_VALUE_FIELD).asText()); + } + } + } + } + return jirasInQueue; + } + + private static String httpGet(String url) + throws IOException { + + HttpGet request = new HttpGet(url); + try { + CloseableHttpClient httpClient = HttpClientBuilder + .create() + .setSslcontext(SSLContexts.custom().useProtocol("TLSv1.2").build()) + .setRetryHandler(new PTestClient.PTestHttpRequestRetryHandler()) + .build(); + request.addHeader("content-type", "application/json"); + HttpResponse httpResponse = httpClient.execute(request); + StatusLine statusLine = httpResponse.getStatusLine(); + if (statusLine.getStatusCode() != 200) { + throw new IllegalStateException(statusLine.getStatusCode() + " " + statusLine.getReasonPhrase()); + } + String response = EntityUtils.toString(httpResponse.getEntity(), "UTF-8"); + return response; + } catch (NoSuchAlgorithmException | KeyManagementException e) { + e.printStackTrace(); + throw new IOException(e.getMessage()); + } finally { + request.abort(); + } + } + + +} diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/api/client/PTestClient.java testutils/ptest2/src/main/java/org/apache/hive/ptest/api/client/PTestClient.java index e878e1834a..9970c36054 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/api/client/PTestClient.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/api/client/PTestClient.java @@ -81,10 +81,11 @@ private static final String PASSWORD = "password"; private static final String PROFILE = "profile"; private static final String PATCH = "patch"; - private static final String JIRA = "jira"; + public static final String JIRA = "jira"; private static final String OUTPUT_DIR = "outputDir"; private static final String TEST_HANDLE = "testHandle"; private static final String CLEAR_LIBRARY_CACHE = "clearLibraryCache"; + public static final String JENKINS_QUEUE_URL = "jenkinsQueueUrl"; private static final int MAX_RETRIES = 10; private final String mApiEndPoint; private final String mLogsEndpoint; @@ -298,6 +299,7 @@ public static void main(String[] args) throws Exception { options.addOption(null, OUTPUT_DIR, true, "Directory to download and save test-results.tar.gz to. (Optional for testStart)"); options.addOption(null, CLEAR_LIBRARY_CACHE, false, "Before starting the test, delete the ivy and maven directories (Optional for testStart)"); options.addOption(null, LOGS_ENDPOINT, true, "URL to get the logs"); + options.addOption(null, JENKINS_QUEUE_URL, true, "URL for quering Jenkins job queue"); CommandLine commandLine = parser.parse(options, args); @@ -320,6 +322,13 @@ public static void main(String[] args) throws Exception { TEST_HANDLE }); + boolean jiraAlreadyInQueue = JenkinsQueueUtil.isJiraAlreadyInQueue(commandLine); + if (jiraAlreadyInQueue) { + System.out.println("Skipping ptest execution, as " + commandLine.getOptionValue(JIRA) + + " is scheduled in " + "queue in " + "the future too."); + System.exit(0); + } + result = client.testStart(commandLine.getOptionValue(PROFILE), commandLine.getOptionValue(TEST_HANDLE), commandLine.getOptionValue(JIRA), commandLine.getOptionValue(PATCH), commandLine.hasOption(CLEAR_LIBRARY_CACHE)); diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java index 7ab98f6a6c..2015187f74 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java @@ -22,10 +22,8 @@ import java.io.IOException; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; @@ -104,7 +102,6 @@ public void execute() throws Throwable { try { int expectedNumHosts = hostExecutors.size(); initalizeHosts(); - resetPerfMetrics(); do { replaceBadHosts(expectedNumHosts); List> results = Lists.newArrayList(); @@ -148,21 +145,10 @@ public void execute() throws Throwable { } } finally { long elapsed = System.currentTimeMillis() - start; - addAggregatePerfMetrics(); logger.info("PERF: exec phase " + TimeUnit.MINUTES.convert(elapsed, TimeUnit.MILLISECONDS) + " minutes"); } } - - public static final String TOTAL_RSYNC_TIME = "TotalRsyncElapsedTime"; - private void addAggregatePerfMetrics() { - long totalRsycTime = 0L; - for (HostExecutor hostExecutor : ImmutableList.copyOf(hostExecutors)) { - totalRsycTime += hostExecutor.getTotalRsyncTimeInMs(); - } - addPerfMetric(TOTAL_RSYNC_TIME, totalRsycTime); - } - private void replaceBadHosts(int expectedNumHosts) throws Exception { Set goodHosts = Sets.newHashSet(); diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java index 47347ebfd4..3a4fa7f3ce 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java @@ -28,7 +28,6 @@ import java.util.concurrent.Callable; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; import com.google.common.base.Stopwatch; import org.apache.commons.lang.StringUtils; @@ -71,7 +70,6 @@ private volatile boolean mShutdown; private int numParallelBatchesProcessed = 0; private int numIsolatedBatchesProcessed = 0; - private AtomicLong totalElapsedTimeInRsync = new AtomicLong(0L); HostExecutor(Host host, String privateKey, ListeningExecutorService executor, SSHCommandExecutor sshCommandExecutor, @@ -141,10 +139,6 @@ void shutdownNow() { boolean isShutdown() { return mShutdown; } - - long getTotalRsyncTimeInMs() { - return totalElapsedTimeInRsync.get(); - } /** * Executes parallel test until the parallel work queue is empty. Then * executes the isolated tests on the host. During each phase if a @@ -317,7 +311,6 @@ RSyncResult copyToDroneFromLocal(Drone drone, String localFile, String remoteFil if(result.getException() != null || result.getExitCode() != 0) { throw new SSHExecutionException(result); } - totalElapsedTimeInRsync.getAndAdd(result.getElapsedTimeInMs()); return result; } /** @@ -387,7 +380,6 @@ RSyncResult copyFromDroneToLocal(Drone drone, String localFile, String remoteFil if(result.getException() != null || result.getExitCode() != Constants.EXIT_CODE_SUCCESS) { throw new SSHExecutionException(result); } - totalElapsedTimeInRsync.getAndAdd(result.getElapsedTimeInMs()); return result; } /** diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java index cf9606ef7a..b57320d2ad 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java @@ -37,7 +37,6 @@ private final StreamReader streamReader; private Integer exitCode; private final int commandId; - private long elapsedTimeInMs; private final Stopwatch stopwatch = Stopwatch.createUnstarted(); public LocalCommand(Logger logger, OutputPolicy outputPolicy, String command) throws IOException { @@ -59,20 +58,12 @@ public int getExitCode() throws InterruptedException { } } - public long getElapsedTimeInMs() throws InterruptedException { - synchronized (process) { - awaitProcessCompletion(); - return elapsedTimeInMs; - } - } - private void awaitProcessCompletion() throws InterruptedException { synchronized (process) { if (exitCode == null) { exitCode = process.waitFor(); if (stopwatch.isRunning()) { stopwatch.stop(); - this.elapsedTimeInMs = stopwatch.elapsed(TimeUnit.MILLISECONDS); logger.info("Finished LocalCommandId={}. ElapsedTime(ms)={}", commandId, stopwatch.elapsed( TimeUnit.MILLISECONDS)); diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java index 4e6aa6df96..8df5162440 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java @@ -26,10 +26,8 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; -import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; @@ -76,7 +74,6 @@ .getLogger(PTest.class); - // dummy patch private final TestConfiguration mConfiguration; private final ListeningExecutorService mExecutor; private final Set mAddedTests; @@ -159,8 +156,7 @@ public HostExecutor build(Host host) { } mHostExecutors = new CopyOnWriteArrayList(hostExecutors); mPhases = Lists.newArrayList(); - mPhases.add(new TestCheckPhase(mHostExecutors, localCommandFactory, templateDefaults, - configuration.getPatch(), patchFile, logger, mAddedTests)); + mPhases.add(new TestCheckPhase(mHostExecutors, localCommandFactory, templateDefaults, patchFile, logger, mAddedTests)); mPhases.add(new PrepPhase(mHostExecutors, localCommandFactory, templateDefaults, scratchDir, patchFile, logger)); mPhases.add(new YetusPhase(configuration, mHostExecutors, localCommandFactory, templateDefaults, mExecutionContext.getLocalWorkingDirectory(), scratchDir, logger, logDir, patchFile)); @@ -186,14 +182,6 @@ public int run() { } finally { long elapsedTime = TimeUnit.MINUTES.convert((System.currentTimeMillis() - start), TimeUnit.MILLISECONDS); - Map perfMetrics = phase.getPerfMetrics(); - if (!perfMetrics.isEmpty()) { - mLogger.info("Adding perf metrics for " + phase.getClass().getSimpleName() + " phase"); - for (Entry perfEntry : perfMetrics.entrySet()) { - elapsedTimes.put(phase.getClass().getSimpleName() + "." + perfEntry.getKey(), - TimeUnit.MINUTES.convert(perfEntry.getValue(), TimeUnit.MILLISECONDS)); - } - } elapsedTimes.put(phase.getClass().getSimpleName(), elapsedTime); } } @@ -235,7 +223,7 @@ public int run() { } mLogger.info("Executed " + mExecutedTests.size() + " tests"); for(Map.Entry entry : elapsedTimes.entrySet()) { - mLogger.info(String.format("PERF: %s took %d minutes", entry.getKey(), entry.getValue())); + mLogger.info(String.format("PERF: Phase %s took %d minutes", entry.getKey(), entry.getValue())); } publishJiraComment(error, messages, failedTests, mAddedTests); if(error || !mFailedTests.isEmpty()) { diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java index 34c66ce5db..c049d65e43 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java @@ -20,14 +20,10 @@ import java.io.IOException; import java.util.List; -import java.util.Map; -import java.util.TreeMap; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import com.google.common.collect.Maps; import org.apache.hive.ptest.execution.LocalCommand.CollectLogPolicy; import org.apache.hive.ptest.execution.ssh.NonZeroExitCodeException; import org.apache.hive.ptest.execution.ssh.RemoteCommandResult; @@ -48,7 +44,6 @@ private final LocalCommandFactory localCommandFactory; private final ImmutableMap templateDefaults; protected final Logger logger; - private Map perfMetrics; public Phase(List hostExecutors, LocalCommandFactory localCommandFactory, @@ -58,7 +53,6 @@ public Phase(List hostExecutors, this.localCommandFactory = localCommandFactory; this.templateDefaults = templateDefaults; this.logger = logger; - this.perfMetrics = new ConcurrentHashMap<>(); } public abstract void execute() throws Throwable; @@ -192,16 +186,4 @@ protected void execLocally(String command) protected ImmutableMap getTemplateDefaults() { return templateDefaults; } - - public Map getPerfMetrics() { - return ImmutableMap.copyOf(perfMetrics); - } - - public void addPerfMetric(final String metricKey, long value) { - perfMetrics.put(metricKey, Long.valueOf(value)); - } - - public void resetPerfMetrics() { - perfMetrics = new ConcurrentHashMap<>(); - } } diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/TestCheckPhase.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/TestCheckPhase.java index 831a9099e2..1107dcd70d 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/TestCheckPhase.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/TestCheckPhase.java @@ -18,8 +18,6 @@ */ package org.apache.hive.ptest.execution; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableMap; import org.slf4j.Logger; @@ -28,40 +26,26 @@ import java.io.FileReader; import java.util.List; import java.util.Set; -import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; public class TestCheckPhase extends Phase { private final File mPatchFile; - private final String mPatchURL; private Set modifiedTestFiles; - private static Cache patchUrls = CacheBuilder.newBuilder().expireAfterWrite - (7, TimeUnit.DAYS).maximumSize(10000).build(); private static final Pattern fileNameFromDiff = Pattern.compile("[/][^\\s]*"); private static final Pattern javaTest = Pattern.compile("Test.*java"); public TestCheckPhase(List hostExecutors, - LocalCommandFactory localCommandFactory, - ImmutableMap templateDefaults, - String patchUrl, File patchFile, Logger logger, Set modifiedTestFiles) { + LocalCommandFactory localCommandFactory, + ImmutableMap templateDefaults, + File patchFile, Logger logger, Set modifiedTestFiles) { super(hostExecutors, localCommandFactory, templateDefaults, logger); this.mPatchFile = patchFile; - this.mPatchURL = patchUrl; this.modifiedTestFiles = modifiedTestFiles; } @Override public void execute() throws Exception { - if (mPatchURL != null) { - boolean patchUrlWasSeen = patchUrls.asMap().containsKey(mPatchURL); - if (!patchUrlWasSeen) { - patchUrls.put(mPatchURL, true); - } else { - throw new Exception("Patch URL " + mPatchURL + " was found in seen patch url's cache and " + - "a test was probably run already on it. Aborting..."); - } - } if(mPatchFile != null) { logger.info("Reading patchfile " + mPatchFile.getAbsolutePath()); FileReader fr = null; diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java index cadf2097f9..fbb1e7934d 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java @@ -22,7 +22,6 @@ private final RSyncCommandExecutor executor; private final String localFile; private final String remoteFile; - private long elapsedTimeInMs; private RSyncCommand.Type type; public RSyncCommand(RSyncCommandExecutor executor, String privateKey, String user, String host, int instance, @@ -36,23 +35,17 @@ public RSyncCommand(RSyncCommandExecutor executor, String privateKey, public RSyncCommand.Type getType() { return type; } - - public void setElapsedTimeInMs(long timeInMs) { - this.elapsedTimeInMs = timeInMs; - } - public String getLocalFile() { return localFile; } public String getRemoteFile() { return remoteFile; } - @Override public RSyncResult call() { executor.execute(this); return new RSyncResult(getUser(), getHost(), getInstance(), getLocalFile(), getRemoteFile(), - getExitCode(), getException(), getOutput(), getElapsedTimeInMs()); + getExitCode(), getException(), getOutput()); } @Override @@ -62,10 +55,6 @@ public String toString() { + getHost() + ", getInstance()=" + getInstance() + "]"; } - public long getElapsedTimeInMs() { - return elapsedTimeInMs; - } - public static enum Type { FROM_LOCAL(), TO_LOCAL(), diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java index af06f20c72..cd7bcf9d4d 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java @@ -93,7 +93,6 @@ public void execute(RSyncCommand command) { } } while (!mShutdown && retry); // an error occurred, re-try command.setExitCode(cmd.getExitCode()); - command.setElapsedTimeInMs(cmd.getElapsedTimeInMs()); } catch (IOException e) { command.setException(e); } catch (InterruptedException e) { diff --git testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java index 12a043515e..ae6bac866e 100644 --- testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java +++ testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java @@ -21,14 +21,12 @@ public class RSyncResult extends AbstractSSHResult { private final String localFile; private final String remoteFile; - private final long elapsedTimeInMs; public RSyncResult(String user, String host, int instance, String localFile, String remoteFile, int exitCode, - Exception exception, String output, long elapsedTimeInMs) { + Exception exception, String output) { super(user, host, instance, exitCode, exception, output); this.localFile = localFile; this.remoteFile = remoteFile; - this.elapsedTimeInMs = elapsedTimeInMs; } public String getLocalFile() { return localFile; @@ -36,7 +34,6 @@ public String getLocalFile() { public String getRemoteFile() { return remoteFile; } - public long getElapsedTimeInMs() { return elapsedTimeInMs; } @Override public String toString() { return "RSyncResult [localFile=" + localFile + ", remoteFile=" diff --git testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java index fd4749eedf..3906435422 100644 --- testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java +++ testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java @@ -18,12 +18,10 @@ */ package org.apache.hive.ptest.execution; -import java.security.SecureRandom; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; -import java.util.Random; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -70,8 +68,6 @@ public synchronized void execute(RSyncCommand command) { matchCount.incrementAndGet(); command.setExitCode(queue.remove()); } - //simulating dummy rsync delay of 17 msec - command.setElapsedTimeInMs(17L); } public int getMatchCount() { diff --git testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java index c32ce106ff..24c811e3f1 100644 --- testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java +++ testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java @@ -150,33 +150,6 @@ public void testFailingUnitTest() throws Throwable { Assert.assertEquals(Sets.newHashSet("SomeTest." + QFILENAME + " (batchId=1)"), failedTests); } - @Test - public void testPerfMetrics() throws Throwable { - //when test is successful - setupUnitTest(); - copyTestOutput("SomeTest-success.xml", succeededLogDir, testBatch.getName()); - Phase phase = getPhase(); - phase.execute(); - Assert.assertNotNull("Perf metrics should have been initialized", phase.getPerfMetrics()); - Assert.assertNotNull(ExecutionPhase.TOTAL_RSYNC_TIME + " should have been initialized", - phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME)); - Assert.assertTrue("Total Rsync Elapsed time should have been greater than 0", - phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME) > 0); - - //when test fails - setupUnitTest(); - sshCommandExecutor.putFailure("bash " + LOCAL_DIR + "/" + HOST + "-" + USER + - "-0/scratch/hiveptest-" + testBatch.getBatchId() + "_" + DRIVER + ".sh", 1); - copyTestOutput("SomeTest-failure.xml", failedLogDir, testBatch.getName()); - phase = getPhase(); - phase.execute(); - Assert.assertNotNull("Perf metrics should have been initialized", phase.getPerfMetrics()); - Assert.assertNotNull(ExecutionPhase.TOTAL_RSYNC_TIME + " should have been initialized", - phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME)); - Assert.assertTrue("Total Rsync Elapsed time should have been greater than 0", - phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME) > 0); - } - @Test(timeout = 20000) public void testTimedOutUnitTest() throws Throwable { setupUnitTest(3); diff --git testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestTestCheckPhase.java testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestTestCheckPhase.java index de3386abcb..7183ee3e36 100644 --- testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestTestCheckPhase.java +++ testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestTestCheckPhase.java @@ -18,6 +18,7 @@ */ package org.apache.hive.ptest.execution; +import org.approvaltests.Approvals; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -27,10 +28,6 @@ import java.util.HashSet; import java.util.Set; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - public class TestTestCheckPhase extends AbstractTestPhase { private TestCheckPhase phase; @@ -45,7 +42,7 @@ public void testNoTests() throws Exception { File patchFile = new File(url.getFile()); Set addedTests = new HashSet(); phase = new TestCheckPhase(hostExecutors, localCommandFactory, - templateDefaults, url.toString(), patchFile, logger, addedTests); + templateDefaults, patchFile, logger, addedTests); phase.execute(); Assert.assertEquals(addedTests.size(), 0); @@ -58,7 +55,7 @@ public void testJavaTests() throws Exception { File patchFile = new File(url.getFile()); Set addedTests = new HashSet(); phase = new TestCheckPhase(hostExecutors, localCommandFactory, - templateDefaults, url.toString(), patchFile, logger, addedTests); + templateDefaults, patchFile, logger, addedTests); phase.execute(); Assert.assertEquals(addedTests.size(), 3); @@ -73,7 +70,7 @@ public void testQTests() throws Exception { File patchFile = new File(url.getFile()); Set addedTests = new HashSet(); phase = new TestCheckPhase(hostExecutors, localCommandFactory, - templateDefaults, url.toString(), patchFile, logger, addedTests); + templateDefaults, patchFile, logger, addedTests); phase.execute(); Assert.assertEquals(addedTests.size(), 1); @@ -86,32 +83,9 @@ public void testRemoveTest() throws Exception { File patchFile = new File(url.getFile()); Set addedTests = new HashSet(); phase = new TestCheckPhase(hostExecutors, localCommandFactory, - templateDefaults, url.toString(), patchFile, logger, addedTests); + templateDefaults, patchFile, logger, addedTests); phase.execute(); Assert.assertEquals(addedTests.size(), 0); } - - @Test - public void testSamePatchMultipleTimes() throws Exception { - int executions = 0; - try { - URL url = this.getClass().getResource("/HIVE-19077.1.patch"); - File patchFile = new File(url.getFile()); - Set addedTests = new HashSet(); - phase = new TestCheckPhase(hostExecutors, localCommandFactory, - templateDefaults, url.toString(), patchFile, logger, addedTests); - phase.execute(); - executions++; - phase = new TestCheckPhase(hostExecutors, localCommandFactory, - templateDefaults, url.toString(), patchFile, logger, addedTests); - phase.execute(); - executions++; - fail("Should've thrown exception"); - } catch (Exception ex) { - assertTrue(ex.getMessage().contains("HIVE-19077.1.patch was found in seen patch url's cache " + - "and a test was probably run already on it. Aborting...")); - } - assertEquals(1, executions); - } } diff --git testutils/ptest2/src/test/resources/HIVE-19077.1.patch testutils/ptest2/src/test/resources/HIVE-19077.1.patch deleted file mode 100644 index cd7b1331ad..0000000000 --- testutils/ptest2/src/test/resources/HIVE-19077.1.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java -index c076d4e112a7edab2106f11fe6224247887313cf..8bcb464de540eda7c14a8c6783bb19a09071af7b 100644 ---- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java -+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java -@@ -25,7 +25,9 @@ - - import org.apache.hadoop.hive.ql.exec.ColumnInfo; - import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; -+import org.apache.hadoop.hive.ql.exec.FilterOperator; - import org.apache.hadoop.hive.ql.exec.Operator; -+import org.apache.hadoop.hive.ql.exec.OperatorFactory; - import org.apache.hadoop.hive.ql.exec.RowSchema; - import org.apache.hadoop.hive.ql.exec.SelectOperator; - import org.apache.hadoop.hive.ql.exec.UnionOperator; diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index b5220a0081..fbb89a958d 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -1063,15 +1063,6 @@ {"IfExprScalarScalar", "long", "double"}, {"IfExprScalarScalar", "double", "double"}, - {"IfExprObjectColumnColumn", "timestamp"}, - {"IfExprObjectColumnColumn", "interval_day_time"}, - {"IfExprObjectColumnScalar", "timestamp"}, - {"IfExprObjectColumnScalar", "interval_day_time"}, - {"IfExprObjectScalarColumn", "timestamp"}, - {"IfExprObjectScalarColumn", "interval_day_time"}, - {"IfExprObjectScalarScalar", "timestamp"}, - {"IfExprObjectScalarScalar", "interval_day_time"}, - // template, , , , , {"VectorUDAFMinMax", "VectorUDAFMinLong", "long", "<", "min", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: long)"}, @@ -1394,12 +1385,6 @@ private void generate() throws Exception { generateIfExprScalarColumn(tdesc); } else if (tdesc[0].equals("IfExprScalarScalar")) { generateIfExprScalarScalar(tdesc); - } else if ( - tdesc[0].equals("IfExprObjectColumnColumn") || - tdesc[0].equals("IfExprObjectColumnScalar") || - tdesc[0].equals("IfExprObjectScalarColumn") || - tdesc[0].equals("IfExprObjectScalarScalar")) { - generateIfExprObject(tdesc); } else if (tdesc[0].equals("FilterDecimalColumnCompareDecimalScalar")) { generateFilterDecimalColumnCompareDecimalScalar(tdesc); } else if (tdesc[0].equals("FilterDecimalScalarCompareDecimalColumn")) { @@ -2274,46 +2259,6 @@ private void generateIfExprScalarScalar(String[] tdesc) throws Exception { className, templateString); } - private void generateIfExprObject(String [] tdesc) throws Exception { - String typeName = tdesc[1]; - String objectName; - String scalarType; - String scalarImport; - if (typeName.equals("timestamp")) { - objectName = "Timestamp"; - scalarType = "Timestamp"; - scalarImport = "java.sql.Timestamp"; - } else if (typeName.equals("interval_day_time")) { - objectName = "IntervalDayTime"; - scalarType = "HiveIntervalDayTime"; - scalarImport = "org.apache.hadoop.hive.common.type.HiveIntervalDayTime"; - } else { - objectName = "unknown"; - scalarType = "unknown"; - scalarImport = "unknown"; - } - String classNameSuffix = tdesc[0].substring("IfExprObject".length()); - - String writableType = getOutputWritableType(typeName); - String columnVectorType = getColumnVectorType(typeName); - - String className = "IfExpr" + objectName + classNameSuffix; - - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); - String templateString = readFile(templateFile); - - templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", scalarType); - templateString = templateString.replaceAll("", scalarImport); - templateString = templateString.replaceAll("", typeName); - templateString = templateString.replaceAll("", objectName); - templateString = templateString.replaceAll("", writableType); - templateString = templateString.replaceAll("", columnVectorType); - - writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, - className, templateString); - } - // template, , , private void generateDecimalColumnUnaryFunc(String [] tdesc) throws Exception { String classNamePrefix = tdesc[1];