diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableOutputFormat.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableOutputFormat.java index 5cf008e..ce6da89 100644 --- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableOutputFormat.java +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableOutputFormat.java @@ -22,10 +22,14 @@ import org.apache.accumulo.core.client.mapred.AccumuloOutputFormat; import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Mutation; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters; import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.util.Progressable; import com.google.common.base.Preconditions; @@ -41,6 +45,13 @@ public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException super.checkOutputSpecs(ignored, job); } + @Override + public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { + configureAccumuloOutputFormat(job); + + return super.getRecordWriter(ignored, job, name, progress); + } + protected void configureAccumuloOutputFormat(JobConf job) throws IOException { AccumuloConnectionParameters cnxnParams = new AccumuloConnectionParameters(job); @@ -76,16 +87,32 @@ protected void configureAccumuloOutputFormat(JobConf job) throws IOException { protected void setAccumuloConnectorInfo(JobConf conf, String username, AuthenticationToken token) throws AccumuloSecurityException { - AccumuloOutputFormat.setConnectorInfo(conf, username, token); + try { + AccumuloOutputFormat.setConnectorInfo(conf, username, token); + } catch (IllegalStateException e) { + // AccumuloOutputFormat complains if you re-set an already set value. We just don't care. + log.debug("Ignoring exception setting Accumulo Connector instance for user " + username, e); + } } @SuppressWarnings("deprecation") protected void setAccumuloZooKeeperInstance(JobConf conf, String instanceName, String zookeepers) { - AccumuloOutputFormat.setZooKeeperInstance(conf, instanceName, zookeepers); + try { + AccumuloOutputFormat.setZooKeeperInstance(conf, instanceName, zookeepers); + } catch (IllegalStateException ise) { + // AccumuloOutputFormat complains if you re-set an already set value. We just don't care. + log.debug("Ignoring exception setting ZooKeeper instance of " + instanceName + " at " + + zookeepers, ise); + } } protected void setAccumuloMockInstance(JobConf conf, String instanceName) { - AccumuloOutputFormat.setMockInstance(conf, instanceName); + try { + AccumuloOutputFormat.setMockInstance(conf, instanceName); + } catch (IllegalStateException e) { + // AccumuloOutputFormat complains if you re-set an already set value. We just don't care. + log.debug("Ignoring exception setting mock instance of " + instanceName, e); + } } protected void setDefaultAccumuloTableName(JobConf conf, String tableName) { diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java index d168012..46c3c1a 100644 --- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java @@ -99,9 +99,6 @@ public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDe // The ObjectInspector for the row ID ObjectInspector fieldObjectInspector = field.getFieldObjectInspector(); - log.info("Serializing rowId with " + value + " in " + field + " using " - + rowIdFactory.getClass()); - // Serialize the row component using the RowIdFactory. In the normal case, this will just // delegate back to the "local" serializeRowId method byte[] data = rowIdFactory.serializeRowId(value, field, output); diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index c452c38..fa71f0e 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -214,8 +214,8 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { PLAN_SERIALIZATION("hive.plan.serialization.format", "kryo", "Query plan format serialization between client and task nodes. \n" + "Two supported values are : kryo and javaXML. Kryo is default."), - SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive", - "HDFS root scratch dir for Hive jobs which gets created with 777 permission. " + + SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive", + "HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. " + "For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/ is created, " + "with ${hive.scratch.dir.permission}."), LOCALSCRATCHDIR("hive.exec.local.scratchdir", @@ -224,7 +224,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", "${system:java.io.tmpdir}" + File.separator + "${hive.session.id}_resources", "Temporary local directory for added resources in the remote file system."), - SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700", + SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700", "The permission for the user specific scratch directories that get created."), SUBMITVIACHILD("hive.exec.submitviachild", false, ""), SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true, @@ -1253,10 +1253,16 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "This param is to control whether or not only do lock on queries\n" + "that need to execute at least one mapred job."), + // Zookeeper related configs HIVE_ZOOKEEPER_QUORUM("hive.zookeeper.quorum", "", - "The list of ZooKeeper servers to talk to. This is only needed for read/write locks."), + "List of ZooKeeper servers to talk to. This is needed for: " + + "1. Read/write locks - when hive.lock.manager is set to " + + "org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager, " + + "2. When HiveServer2 supports service discovery via Zookeeper."), HIVE_ZOOKEEPER_CLIENT_PORT("hive.zookeeper.client.port", "2181", - "The port of ZooKeeper servers to talk to. This is only needed for read/write locks."), + "The port of ZooKeeper servers to talk to. " + + "If the list of Zookeeper servers specified in hive.zookeeper.quorum," + + "does not contain port numbers, this value is used."), HIVE_ZOOKEEPER_SESSION_TIMEOUT("hive.zookeeper.session.timeout", 600*1000, "ZooKeeper client's session timeout. The client is disconnected, and as a result, all locks released, \n" + "if a heartbeat is not sent in the timeout."), @@ -1456,11 +1462,6 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "If the property is set, the value must be a valid URI (java.net.URI, e.g. \"file:///tmp/my-logging.properties\"), \n" + "which you can then extract a URL from and pass to PropertyConfigurator.configure(URL)."), - // Hive global init file location - HIVE_GLOBAL_INIT_FILE_LOCATION("hive.server2.global.init.file.location", "${env:HIVE_CONF_DIR}", - "The location of HS2 global init file (.hiverc).\n" + - "If the property is reset, the value must be a valid path where the init file is located."), - // prefix used to auto generated column aliases (this should be started with '_') HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c", "String used as a prefix when auto generating column alias.\n" + @@ -1499,16 +1500,29 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "table. From 0.12 onwards, they are displayed separately. This flag will let you\n" + "get old behavior, if desired. See, test-case in patch for HIVE-6689."), + // HiveServer2 specific configs HIVE_SERVER2_MAX_START_ATTEMPTS("hive.server2.max.start.attempts", 30L, new RangeValidator(0L, null), - "This number of times HiveServer2 will attempt to start before exiting, sleeping 60 seconds between retries. \n" + - "The default of 30 will keep trying for 30 minutes."), - + "Number of times HiveServer2 will attempt to start before exiting, sleeping 60 seconds " + + "between retries. \n The default of 30 will keep trying for 30 minutes."), + HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY("hive.server2.support.dynamic.service.discovery", false, + "Whether HiveServer2 supports dynamic service discovery for its clients. " + + "To support this, each instance of HiveServer2 currently uses ZooKeeper to register itself, " + + "when it is brought up. JDBC/ODBC clients should use the ZooKeeper ensemble: " + + "hive.zookeeper.quorum in their connection string."), + HIVE_SERVER2_ZOOKEEPER_NAMESPACE("hive.server2.zookeeper.namespace", "hiveserver2", + "The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery."), + // HiveServer2 global init file location + HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION("hive.server2.global.init.file.location", "${env:HIVE_CONF_DIR}", + "The location of HS2 global init file (.hiverc).\n" + + "If the property is reset, the value must be a valid path where the init file is located."), HIVE_SERVER2_TRANSPORT_MODE("hive.server2.transport.mode", "binary", new StringSet("binary", "http"), "Transport mode of HiveServer2."), + HIVE_SERVER2_THRIFT_BIND_HOST("hive.server2.thrift.bind.host", "", + "Bind host on which to run the HiveServer2 Thrift service."), // http (over thrift) transport settings HIVE_SERVER2_THRIFT_HTTP_PORT("hive.server2.thrift.http.port", 10001, - "Port number when in HTTP mode."), + "Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'http'."), HIVE_SERVER2_THRIFT_HTTP_PATH("hive.server2.thrift.http.path", "cliservice", "Path component of URL endpoint when in HTTP mode."), HIVE_SERVER2_THRIFT_HTTP_MIN_WORKER_THREADS("hive.server2.thrift.http.min.worker.threads", 5, @@ -1525,11 +1539,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { // binary transport settings HIVE_SERVER2_THRIFT_PORT("hive.server2.thrift.port", 10000, - "Port number of HiveServer2 Thrift interface.\n" + - "Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT"), - HIVE_SERVER2_THRIFT_BIND_HOST("hive.server2.thrift.bind.host", "", - "Bind host on which to run the HiveServer2 Thrift interface.\n" + - "Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST"), + "Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'."), // hadoop.rpc.protection being set to a higher level than HiveServer2 // does not make sense in most situations. // HiveServer2 ignores hadoop.rpc.protection in favor of hive.server2.thrift.sasl.qop. diff --git a/data/files/data_with_escape.txt b/data/files/data_with_escape.txt new file mode 100644 index 0000000..bd9cc6e --- /dev/null +++ b/data/files/data_with_escape.txt @@ -0,0 +1,5 @@ +re\|ading|V\|A|100 +writ\|ing|MD|200 +w\|aiting|\|NC|300 +seein\|g|TN\||400 +runn\|ing|WV|500 diff --git a/data/files/opencsv-data.txt b/data/files/opencsv-data.txt new file mode 100644 index 0000000..7d5968b --- /dev/null +++ b/data/files/opencsv-data.txt @@ -0,0 +1,3 @@ +why hello there,42,3,100,1412341,true,42.43,85.23423424 +another record,98,4,101,9999999,false,99.89,0.00000009 +third record,45,5,102,999999999,true,89.99,0.00000000000009 \ No newline at end of file diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java index c87b95a..4b52faf 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java @@ -36,6 +36,7 @@ import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; + import org.apache.pig.LoadFunc; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; import org.apache.pig.data.Tuple; diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java index a4b55c8..fe4c9f7 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java @@ -23,9 +23,8 @@ import java.util.HashMap; import java.util.Iterator; -import junit.framework.TestCase; - import org.apache.commons.io.FileUtils; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.cli.CliSessionState; @@ -42,6 +41,7 @@ import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; + import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatContext; @@ -51,12 +51,16 @@ import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; import org.apache.hive.hcatalog.mapreduce.HCatMapRedUtil; + import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.data.Tuple; -public class TestE2EScenarios extends TestCase { +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +public class TestE2EScenarios { private static final String TEST_DATA_DIR = System.getProperty("java.io.tmpdir") + File.separator + TestHCatLoader.class.getCanonicalName() + "-" + System.currentTimeMillis(); private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; @@ -69,9 +73,8 @@ protected String storageFormat() { return "orc"; } - @Override - protected void setUp() throws Exception { - + @Before + public void setUp() throws Exception { File f = new File(TEST_WAREHOUSE_DIR); if (f.exists()) { FileUtil.fullyDelete(f); @@ -90,8 +93,8 @@ protected void setUp() throws Exception { } - @Override - protected void tearDown() throws Exception { + @After + public void tearDown() throws Exception { try { dropTable("inpy"); dropTable("rc5318"); @@ -146,16 +149,13 @@ private void pigDump(String tableName) throws IOException { System.err.println("==="); } - private void copyTable(String in, String out) throws IOException, InterruptedException { Job ijob = new Job(); Job ojob = new Job(); HCatInputFormat inpy = new HCatInputFormat(); inpy.setInput(ijob , null, in); HCatOutputFormat oupy = new HCatOutputFormat(); - oupy.setOutput(ojob, - OutputJobInfo.create(null, out, new HashMap() - )); + oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap())); // Test HCatContext @@ -207,6 +207,7 @@ private TaskAttemptContext createTaskAttemptContext(Configuration tconf) { } + @Test public void testReadOrcAndRCFromPig() throws Exception { String tableSchema = "ti tinyint, si smallint,i int, bi bigint, f float, d double, b boolean"; @@ -224,15 +225,14 @@ public void testReadOrcAndRCFromPig() throws Exception { driverRun("LOAD DATA LOCAL INPATH '"+TEXTFILE_LOCN+"' OVERWRITE INTO TABLE inpy"); // write it out from hive to an rcfile table, and to an orc table -// driverRun("insert overwrite table rc5318 select * from inpy"); + //driverRun("insert overwrite table rc5318 select * from inpy"); copyTable("inpy","rc5318"); -// driverRun("insert overwrite table orc5318 select * from inpy"); + //driverRun("insert overwrite table orc5318 select * from inpy"); copyTable("inpy","orc5318"); pigDump("inpy"); pigDump("rc5318"); pigDump("orc5318"); - } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java index 82fc8a9..ee3e750 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java @@ -34,6 +34,7 @@ import java.util.Properties; import org.apache.commons.io.FileUtils; + import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -45,11 +46,13 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.mapreduce.Job; + import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.data.Pair; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; + import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.ResourceStatistics; @@ -60,10 +63,13 @@ import org.apache.pig.PigRunner; import org.apache.pig.tools.pigstats.OutputStats; import org.apache.pig.tools.pigstats.PigStats; + import org.joda.time.DateTime; + import org.junit.After; import org.junit.Before; import org.junit.Test; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,7 +78,7 @@ public class TestHCatLoader { private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoader.class); private static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName(System.getProperty("java.io.tmpdir") + - File.separator + TestHCatLoader.class.getCanonicalName() + "-" + System.currentTimeMillis()); + File.separator + TestHCatLoader.class.getCanonicalName() + "-" + System.currentTimeMillis()); private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; @@ -93,6 +99,7 @@ protected String storageFormat() { private void dropTable(String tablename) throws IOException, CommandNeedRetryException { dropTable(tablename, driver); } + static void dropTable(String tablename, Driver driver) throws IOException, CommandNeedRetryException { driver.run("drop table if exists " + tablename); } @@ -100,7 +107,8 @@ static void dropTable(String tablename, Driver driver) throws IOException, Comma private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { createTable(tablename, schema, partitionedBy, driver, storageFormat()); } - static void createTable(String tablename, String schema, String partitionedBy, Driver driver, String storageFormat) + + static void createTable(String tablename, String schema, String partitionedBy, Driver driver, String storageFormat) throws IOException, CommandNeedRetryException { String createTable; createTable = "create table " + tablename + "(" + schema + ") "; @@ -114,6 +122,7 @@ static void createTable(String tablename, String schema, String partitionedBy, D private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { createTable(tablename, schema, null); } + /** * Execute Hive CLI statement * @param cmd arbitrary statement to execute @@ -125,20 +134,20 @@ static void executeStatementOnDriver(String cmd, Driver driver) throws IOExcepti throw new IOException("Failed to execute \"" + cmd + "\". Driver returned " + cpr.getResponseCode() + " Error: " + cpr.getErrorMessage()); } } + private static void checkProjection(FieldSchema fs, String expectedName, byte expectedPigType) { assertEquals(fs.alias, expectedName); assertEquals("Expected " + DataType.findTypeName(expectedPigType) + "; got " + DataType.findTypeName(fs.type), expectedPigType, fs.type); } - + @Before public void setup() throws Exception { - File f = new File(TEST_WAREHOUSE_DIR); if (f.exists()) { FileUtil.fullyDelete(f); } - if(!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { + if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); } @@ -192,7 +201,7 @@ public void setup() throws Exception { server.registerQuery("B = foreach A generate a,b;", ++i); server.registerQuery("B2 = filter B by a < 2;", ++i); server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');", ++i); - + server.registerQuery("C = foreach A generate a,b;", ++i); server.registerQuery("C2 = filter C by a >= 2;", ++i); server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');", ++i); @@ -470,7 +479,7 @@ public void testColumnarStorePushdown() throws Exception { { fs.delete(new Path(PIGOUTPUT_DIR), true); } - }finally { + } finally { new File(PIG_FILE).delete(); } } @@ -534,7 +543,7 @@ public void testConvertBooleanToInt() throws Exception { } /** - * basic tests that cover each scalar type + * basic tests that cover each scalar type * https://issues.apache.org/jira/browse/HIVE-5814 */ private static final class AllTypesTable { diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java index eadbf20..40ec597 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; + import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; @@ -44,8 +45,10 @@ import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; + import org.junit.BeforeClass; import org.junit.Test; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -225,7 +228,7 @@ private void verifyWriteRead(String tablename, String pigSchema, String tableSch dropTable(tablename); } } - + private void compareTuples(Tuple t1, Tuple t2) throws ExecException { Assert.assertEquals("Tuple Sizes don't match", t1.size(), t2.size()); for (int i = 0; i < t1.size(); i++) { @@ -237,7 +240,7 @@ private void compareTuples(Tuple t1, Tuple t2) throws ExecException { Assert.assertEquals(msg, noOrder(f1.toString()), noOrder(f2.toString())); } } - + private String noOrder(String s) { char[] chars = s.toCharArray(); Arrays.sort(chars); diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java index fcfc642..763af9f 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java @@ -31,8 +31,10 @@ import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; + import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; + import org.apache.pig.EvalFunc; import org.apache.pig.ExecType; import org.apache.pig.PigException; @@ -41,10 +43,13 @@ import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.util.LogUtils; + import org.joda.time.DateTime; import org.joda.time.DateTimeZone; + import org.junit.Assert; import org.junit.Test; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,6 +68,7 @@ public void testWriteTinyint() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "tinyint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, Integer.toString(300)); } + @Test public void testWriteSmallint() throws Exception { pigValueRangeTest("junitTypeTest1", "smallint", "int", null, Integer.toString(Short.MIN_VALUE), @@ -72,6 +78,7 @@ public void testWriteSmallint() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "smallint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, Integer.toString(Short.MAX_VALUE + 1)); } + @Test public void testWriteChar() throws Exception { pigValueRangeTest("junitTypeTest1", "char(5)", "chararray", null, "xxx", "xxx "); @@ -81,6 +88,7 @@ public void testWriteChar() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "char(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, "too_long2"); } + @Test public void testWriteVarchar() throws Exception { pigValueRangeTest("junitTypeTest1", "varchar(5)", "chararray", null, "xxx", "xxx"); @@ -90,6 +98,7 @@ public void testWriteVarchar() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "varchar(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, "too_long2"); } + @Test public void testWriteDecimalXY() throws Exception { pigValueRangeTest("junitTypeTest1", "decimal(5,2)", "bigdecimal", null, BigDecimal.valueOf(1.2).toString(), @@ -100,6 +109,7 @@ public void testWriteDecimalXY() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "decimal(5,2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(500.123).toString()); } + @Test public void testWriteDecimalX() throws Exception { //interestingly decimal(2) means decimal(2,0) @@ -110,6 +120,7 @@ public void testWriteDecimalX() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "decimal(2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(50.123).toString()); } + @Test public void testWriteDecimal() throws Exception { //decimal means decimal(10,0) @@ -120,9 +131,10 @@ public void testWriteDecimal() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "decimal", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(12345678900L).toString()); } + /** * because we want to ignore TZ which is included in toString() - * include time to make sure it's 0 + * include time to make sure it's 0 */ private static final String FORMAT_4_DATE = "yyyy-MM-dd HH:mm:ss"; @Test @@ -142,6 +154,7 @@ public void testWriteDate() throws Exception { pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE);//date out of range due to time!=0 } + @Test public void testWriteDate3() throws Exception { DateTime d = new DateTime(1991,10,11,23,10,DateTimeZone.forOffsetHours(-11)); @@ -154,6 +167,7 @@ public void testWriteDate3() throws Exception { pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); } + @Test public void testWriteDate2() throws Exception { DateTime d = new DateTime(1991,11,12,0,0, DateTimeZone.forID("US/Eastern")); @@ -168,46 +182,48 @@ public void testWriteDate2() throws Exception { pigValueRangeTestOverflow("junitTypeTest3", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); } + /** - * Note that the value that comes back from Hive will have local TZ on it. Using local is + * Note that the value that comes back from Hive will have local TZ on it. Using local is * arbitrary but DateTime needs TZ (or will assume default) and Hive does not have TZ. * So if you start with Pig value in TZ=x and write to Hive, when you read it back the TZ may * be different. The millis value should match, of course. - * + * * @throws Exception */ @Test public void testWriteTimestamp() throws Exception { DateTime d = new DateTime(1991,10,11,14,23,30, 10);//uses default TZ - pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), + pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.plusHours(2); pigValueRangeTest("junitTypeTest2", "timestamp", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.toDateTime(DateTimeZone.UTC); - pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), + pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = new DateTime(1991,10,11,23,24,25, 26); - pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), + pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.toDateTime(DateTimeZone.UTC); - pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), + pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); } //End: tests that check values from Pig that are out of range for target column - private void pigValueRangeTestOverflow(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String format) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, null, format); } + private void pigValueRangeTestOverflow(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, null, null); } + private void pigValueRangeTest(String tblName, String hiveType, String pigType, - HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, + HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, expectedValue, null); } @@ -218,6 +234,7 @@ private void pigValueRangeTest(String tblName, String hiveType, String pigType, String getStorageFormat() { return "RCFILE"; } + /** * This is used to test how Pig values of various data types which are out of range for Hive target * column are handled. Currently the options are to raise an error or write NULL. @@ -236,7 +253,7 @@ String getStorageFormat() { * @param format date format to use for comparison of values since default DateTime.toString() * includes TZ which is meaningless for Hive DATE type */ - private void pigValueRangeTest(String tblName, String hiveType, String pigType, + private void pigValueRangeTest(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, String format) throws Exception { TestHCatLoader.dropTable(tblName, driver); @@ -309,6 +326,7 @@ private void pigValueRangeTest(String tblName, String hiveType, String pigType, Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a String thus the timestamp in 't' doesn't match rawData*/ } + /** * Create a data file with datatypes added in 0.13. Read it with Pig and use * Pig + HCatStorer to write to a Hive table. Then read it using Pig and Hive @@ -365,6 +383,7 @@ public void testDateCharTypes() throws Exception { } Assert.assertEquals("Expected " + NUM_ROWS + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME, NUM_ROWS, numRowsRead); } + static void dumpFile(String fileName) throws Exception { File file = new File(fileName); BufferedReader reader = new BufferedReader(new FileReader(file)); @@ -375,6 +394,7 @@ static void dumpFile(String fileName) throws Exception { } reader.close(); } + @Test public void testPartColsInData() throws IOException, CommandNeedRetryException { diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java index 76080f7..9679d3c 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java @@ -25,20 +25,26 @@ import java.util.HashMap; import java.util.Map; -import junit.framework.TestCase; - import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.session.SessionState; + import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.data.Pair; + import org.apache.pig.ExecType; import org.apache.pig.PigServer; -public class TestHCatStorerMulti extends TestCase { +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestHCatStorerMulti { public static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName( System.getProperty("user.dir") + "/build/test/data/" + TestHCatStorerMulti.class.getCanonicalName() + "-" + System.currentTimeMillis()); @@ -77,8 +83,8 @@ private void createTable(String tablename, String schema) throws IOException, Co createTable(tablename, schema, null); } - @Override - protected void setUp() throws Exception { + @Before + public void setUp() throws Exception { if (driver == null) { HiveConf hiveConf = new HiveConf(this.getClass()); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); @@ -92,14 +98,13 @@ protected void setUp() throws Exception { cleanup(); } - @Override - protected void tearDown() throws Exception { + @After + public void tearDown() throws Exception { cleanup(); } + @Test public void testStoreBasicTable() throws Exception { - - createTable(BASIC_TABLE, "a int, b string"); populateBasicFile(); @@ -117,6 +122,7 @@ public void testStoreBasicTable() throws Exception { assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); } + @Test public void testStorePartitionedTable() throws Exception { createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); @@ -139,9 +145,8 @@ public void testStorePartitionedTable() throws Exception { assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); } + @Test public void testStoreTableMulti() throws Exception { - - createTable(BASIC_TABLE, "a int, b string"); createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java index 7f0bca7..2cf14aa 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java @@ -25,10 +25,13 @@ import java.util.UUID; import org.apache.hadoop.hive.ql.CommandNeedRetryException; + import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; + import org.apache.pig.ExecType; import org.apache.pig.PigServer; + import org.junit.Assert; import org.junit.Test; diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatPigStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatPigStorer.java deleted file mode 100644 index a9b4521..0000000 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatPigStorer.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.apache.hive.hcatalog.pig; - -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.junit.Ignore; -import org.junit.Test; - -import java.io.IOException; - -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -public class TestOrcHCatPigStorer extends TestHCatStorer { - @Override String getStorageFormat() { - return "ORC"; - } -} diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java index 1084092..65769b4 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java @@ -18,11 +18,15 @@ */ package org.apache.hive.hcatalog.pig; -public class TestOrcHCatStorer extends TestHCatStorerMulti { +import java.io.IOException; - @Override - protected String storageFormat() { - return "orc"; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestOrcHCatStorer extends TestHCatStorer { + @Override String getStorageFormat() { + return "ORC"; } } - diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorerMulti.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorerMulti.java new file mode 100644 index 0000000..77c7979 --- /dev/null +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorerMulti.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +public class TestOrcHCatStorerMulti extends TestHCatStorerMulti { + + @Override + protected String storageFormat() { + return "orc"; + } +} + diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java index a8ce61a..ca952e7 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java @@ -20,14 +20,18 @@ package org.apache.hive.hcatalog.pig; import com.google.common.collect.Lists; + import junit.framework.Assert; + import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchema; + import org.apache.pig.ResourceSchema; import org.apache.pig.ResourceSchema.ResourceFieldSchema; import org.apache.pig.data.DataType; import org.apache.pig.impl.util.UDFContext; + import org.junit.Test; public class TestPigHCatUtil { diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java index af952f2..b98e8c5 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java @@ -24,6 +24,7 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.shims.HadoopShimsSecure; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.util.StringUtils; @@ -314,9 +315,9 @@ public void testPropertiesParsing() throws Exception { @Test public void testFindContainingJar() throws Exception { - String result = TempletonUtils.findContainingJar(ShimLoader.class, ".*hive-shims.*"); + String result = TempletonUtils.findContainingJar(Configuration.class, ".*hadoop.*\\.jar.*"); Assert.assertNotNull(result); - result = TempletonUtils.findContainingJar(HadoopShimsSecure.class, ".*hive-shims.*"); + result = TempletonUtils.findContainingJar(FileSystem.class, ".*hadoop.*\\.jar.*"); Assert.assertNotNull(result); result = TempletonUtils.findContainingJar(HadoopShimsSecure.class, ".*unknownjar.*"); Assert.assertNull(result); diff --git a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestExtendedAcls.java b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestExtendedAcls.java index fc5bb8a..d846a63 100644 --- a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestExtendedAcls.java +++ b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestExtendedAcls.java @@ -34,6 +34,7 @@ import org.junit.Assert; import org.junit.BeforeClass; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; public class TestExtendedAcls extends FolderPermissionBase { @@ -46,7 +47,7 @@ public static void setup() throws Exception { baseSetup(); } - List aclSpec1 = Lists.newArrayList( + private final ImmutableList aclSpec1 = ImmutableList.of( aclEntry(ACCESS, USER, FsAction.ALL), aclEntry(ACCESS, GROUP, FsAction.ALL), aclEntry(ACCESS, OTHER, FsAction.ALL), @@ -55,7 +56,7 @@ public static void setup() throws Exception { aclEntry(ACCESS, GROUP, "bar", FsAction.READ_WRITE), aclEntry(ACCESS, GROUP, "foo", FsAction.READ_EXECUTE)); - List aclSpec2 = Lists.newArrayList( + private final ImmutableList aclSpec2 = ImmutableList.of( aclEntry(ACCESS, USER, FsAction.ALL), aclEntry(ACCESS, GROUP, FsAction.ALL), aclEntry(ACCESS, OTHER, FsAction.READ_EXECUTE), @@ -83,20 +84,20 @@ public void verifyPermission(String locn, int permIndex) throws Exception { switch (permIndex) { case 0: FsPermission perm = fs.getFileStatus(new Path(locn)).getPermission(); - Assert.assertEquals(perm.toString(), "rwxrwxrwx"); + Assert.assertEquals("Location: " + locn, "rwxrwxrwx", String.valueOf(perm)); List actual = getAcl(locn); verifyAcls(aclSpec1, actual); break; case 1: perm = fs.getFileStatus(new Path(locn)).getPermission(); - Assert.assertEquals(perm.toString(), "rwxrwxr-x"); + Assert.assertEquals("Location: " + locn, "rwxrwxr-x", String.valueOf(perm)); List acls = getAcl(locn); verifyAcls(aclSpec2, acls); break; default: - throw new RuntimeException("Only 2 permissions by this test"); + throw new RuntimeException("Only 2 permissions by this test: " + permIndex); } } diff --git a/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java b/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java index b430cae..ffad413 100644 --- a/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java +++ b/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; import org.apache.hadoop.hive.shims.HadoopShims.MiniMrShim; import org.apache.hadoop.hive.shims.ShimLoader; @@ -50,6 +51,7 @@ public static final String HS2_HTTP_MODE = "http"; private static final String driverName = "org.apache.hive.jdbc.HiveDriver"; private static final FsPermission FULL_PERM = new FsPermission((short)00777); + private static final FsPermission WRITE_ALL_PERM = new FsPermission((short)00733); private HiveServer2 hiveServer2 = null; private final File baseDir; private final Path baseDfsDir; @@ -200,9 +202,8 @@ private MiniHS2(HiveConf hiveConf, boolean useMiniMR, boolean useMiniKdc, hiveConf.setIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT, getHttpPort()); Path scratchDir = new Path(baseDfsDir, "scratch"); - - // Create scratchdir with 777, so that user impersonation has no issues. - FileSystem.mkdirs(fs, scratchDir, FULL_PERM); + // Create root scratchdir with write all, so that user impersonation has no issues. + Utilities.createDirsWithPermission(hiveConf, scratchDir, WRITE_ALL_PERM, true); System.setProperty(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.toString()); hiveConf.setVar(ConfVars.SCRATCHDIR, scratchDir.toString()); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java index fb6d526..7f8ca87 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java @@ -52,7 +52,7 @@ protected static Path warehouseDir; protected static Path baseDfsDir; - public static final PathFilter hiddenFileFilter = new PathFilter(){ + protected static final PathFilter hiddenFileFilter = new PathFilter(){ public boolean accept(Path p){ String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); @@ -591,7 +591,7 @@ private void assertExistence(String locn) throws Exception { private List listStatus(String locn) throws Exception { List results = new ArrayList(); - FileStatus[] listStatus = fs.listStatus(new Path(locn)); + FileStatus[] listStatus = fs.listStatus(new Path(locn), hiddenFileFilter); for (FileStatus status : listStatus) { results.add(status.getPath().toString()); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java index 53d88b0..3c99068 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java @@ -33,10 +33,13 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator; @@ -52,10 +55,12 @@ * Test HiveAuthorizer api invocation */ public class TestHiveAuthorizerCheckInvocation { + private final Log LOG = LogFactory.getLog(this.getClass().getName());; protected static HiveConf conf; protected static Driver driver; private static final String tableName = TestHiveAuthorizerCheckInvocation.class.getSimpleName() + "Table"; + private static final String acidTableName = tableName + "_acid"; private static final String dbName = TestHiveAuthorizerCheckInvocation.class.getSimpleName() + "Db"; static HiveAuthorizer mockedAuthorizer; @@ -82,14 +87,18 @@ public static void beforeTest() throws Exception { conf.setVar(ConfVars.HIVE_AUTHORIZATION_MANAGER, MockedHiveAuthorizerFactory.class.getName()); conf.setVar(ConfVars.HIVE_AUTHENTICATOR_MANAGER, SessionStateUserAuthenticator.class.getName()); conf.setBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED, true); - conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); conf.setBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS, false); + conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, true); + conf.setVar(ConfVars.HIVE_TXN_MANAGER, DbTxnManager.class.getName()); SessionState.start(conf); driver = new Driver(conf); runCmd("create table " + tableName + " (i int, j int, k string) partitioned by (city string, date string) "); runCmd("create database " + dbName); + // Need a separate table for ACID testing since it has to be bucketed and it has to be Acid + runCmd("create table " + acidTableName + " (i int, j int) clustered by (i) into 2 buckets " + + "stored as orc"); } private static void runCmd(String cmd) throws CommandNeedRetryException { @@ -99,6 +108,10 @@ private static void runCmd(String cmd) throws CommandNeedRetryException { @AfterClass public static void afterTests() throws Exception { + // Drop the tables when we're done. This makes the test work inside an IDE + runCmd("drop table if exists " + acidTableName); + runCmd("drop table if exists " + tableName); + runCmd("drop database if exists " + dbName); driver.close(); } @@ -244,6 +257,63 @@ public void testTempFunction() throws HiveAuthzPluginException, HiveAccessContro assertEquals("db name", null, funcObj.getDbname()); } + @Test + public void testUpdateSomeColumnsUsed() throws HiveAuthzPluginException, + HiveAccessControlException, CommandNeedRetryException { + reset(mockedAuthorizer); + int status = driver.compile("update " + acidTableName + " set i = 5 where j = 3"); + assertEquals(0, status); + + Pair, List> io = getHivePrivilegeObjectInputs(); + List outputs = io.getRight(); + HivePrivilegeObject tableObj = outputs.get(0); + LOG.debug("Got privilege object " + tableObj); + assertEquals("no of columns used", 1, tableObj.getColumns().size()); + assertEquals("Column used", "i", tableObj.getColumns().get(0)); + List inputs = io.getLeft(); + assertEquals(1, inputs.size()); + tableObj = inputs.get(0); + assertEquals(1, tableObj.getColumns().size()); + assertEquals("j", tableObj.getColumns().get(0)); + } + + @Test + public void testUpdateSomeColumnsUsedExprInSet() throws HiveAuthzPluginException, + HiveAccessControlException, CommandNeedRetryException { + reset(mockedAuthorizer); + int status = driver.compile("update " + acidTableName + " set i = 5, l = k where j = 3"); + assertEquals(0, status); + + Pair, List> io = getHivePrivilegeObjectInputs(); + List outputs = io.getRight(); + HivePrivilegeObject tableObj = outputs.get(0); + LOG.debug("Got privilege object " + tableObj); + assertEquals("no of columns used", 2, tableObj.getColumns().size()); + assertEquals("Columns used", Arrays.asList("i", "l"), + getSortedList(tableObj.getColumns())); + List inputs = io.getLeft(); + assertEquals(1, inputs.size()); + tableObj = inputs.get(0); + assertEquals(2, tableObj.getColumns().size()); + assertEquals("Columns used", Arrays.asList("j", "k"), + getSortedList(tableObj.getColumns())); + } + + @Test + public void testDelete() throws HiveAuthzPluginException, + HiveAccessControlException, CommandNeedRetryException { + reset(mockedAuthorizer); + int status = driver.compile("delete from " + acidTableName + " where j = 3"); + assertEquals(0, status); + + Pair, List> io = getHivePrivilegeObjectInputs(); + List inputs = io.getLeft(); + assertEquals(1, inputs.size()); + HivePrivilegeObject tableObj = inputs.get(0); + assertEquals(1, tableObj.getColumns().size()); + assertEquals("j", tableObj.getColumns().get(0)); + } + private void checkSingleTableInput(List inputs) { assertEquals("number of inputs", 1, inputs.size()); diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java index e1d44ec..1e66542 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java @@ -210,7 +210,7 @@ private void testScriptFile(String testName, String scriptText, String expectedP } scriptFile.delete(); } - + /** * Test that BeeLine will read comment lines that start with whitespace * @throws Throwable diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java index ae128a9..daf8e9e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java @@ -262,10 +262,9 @@ public void testBadURL() throws Exception { private void checkBadUrl(String url) throws SQLException { try{ DriverManager.getConnection(url, "", ""); - fail("should have thrown IllegalArgumentException but did not "); - } catch(SQLException i) { - assertTrue(i.getMessage().contains("Bad URL format. Hostname not found " - + " in authority part of the url")); + fail("Should have thrown JdbcUriParseException but did not "); + } catch(JdbcUriParseException e) { + assertTrue(e.getMessage().contains("Bad URL format")); } } @@ -736,7 +735,7 @@ public void testDataTypes() throws Exception { assertTrue(res.next()); // skip the last (partitioning) column since it is always non-null for (int i = 1; i < meta.getColumnCount(); i++) { - assertNull(res.getObject(i)); + assertNull("Column " + i + " should be null", res.getObject(i)); } // getXXX returns 0 for numeric types, false for boolean and null for other assertEquals(0, res.getInt(1)); @@ -1618,6 +1617,10 @@ public void testResultSetMetaData() throws SQLException { // [url] [host] [port] [db] private static final String[][] URL_PROPERTIES = new String[][] { // binary mode + // For embedded mode, the JDBC uri is of the form: + // jdbc:hive2:///dbName;sess_var_list?hive_conf_list#hive_var_list + // and does not contain host:port string. + // As a result port is parsed to '-1' per the Java URI conventions {"jdbc:hive2://", "", "", "default"}, {"jdbc:hive2://localhost:10001/default", "localhost", "10001", "default"}, {"jdbc:hive2://localhost/notdefault", "localhost", "10000", "notdefault"}, @@ -1654,7 +1657,8 @@ public void testDriverProperties() throws SQLException { }; @Test - public void testParseUrlHttpMode() throws SQLException { + public void testParseUrlHttpMode() throws SQLException, JdbcUriParseException, + ZooKeeperHiveClientException { new HiveDriver(); for (String[] testValues : HTTP_URL_PROPERTIES) { JdbcConnectionParams params = Utils.parseURL(testValues[0]); diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index 241d315..e0b6558 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -388,7 +388,7 @@ public void testSessionScratchDirs() throws Exception { } /** - * Tests the creation of the root hdfs scratch dir, which should be writable by all (777). + * Tests the creation of the root hdfs scratch dir, which should be writable by all. * * @throws Exception */ @@ -410,7 +410,7 @@ public void testRootScratchDir() throws Exception { hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password"); // FS FileSystem fs = miniHS2.getLocalFS(); - FsPermission expectedFSPermission = new FsPermission("777"); + FsPermission expectedFSPermission = new FsPermission((short)00733); // Verify scratch dir paths and permission // HDFS scratch dir scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR)); diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 78ea21d..4a60f52 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -103,6 +103,7 @@ public static final String UTF_8 = "UTF-8"; private static final Log LOG = LogFactory.getLog("QTestUtil"); + private static final String QTEST_LEAVE_FILES = "QTEST_LEAVE_FILES"; private final String defaultInitScript = "q_test_init.sql"; private final String defaultCleanupScript = "q_test_cleanup.sql"; @@ -537,6 +538,9 @@ public void clearPostTestEffects() throws Exception { * Clear out any side effects of running tests */ public void clearTestSideEffects() throws Exception { + if (System.getenv(QTEST_LEAVE_FILES) != null) { + return; + } // Delete any tables other than the source tables // and any databases other than the default database. for (String dbName : db.getAllDatabases()) { @@ -598,6 +602,9 @@ public void cleanUp() throws Exception { if(!isSessionStateStarted) { startSessionState(); } + if (System.getenv(QTEST_LEAVE_FILES) != null) { + return; + } SessionState.get().getConf().setBoolean("hive.test.shutdown.phase", true); diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/MetastoreAuthzAPIDisallowAuthorizer.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/MetastoreAuthzAPIDisallowAuthorizer.java new file mode 100644 index 0000000..35e856e --- /dev/null +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/MetastoreAuthzAPIDisallowAuthorizer.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security; + +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.security.authorization.MetaStoreAuthzAPIAuthorizerEmbedOnly; + +/** + * Authorizer that prevents any authorization api call from being made. For use in testing. + */ +public class MetastoreAuthzAPIDisallowAuthorizer extends MetaStoreAuthzAPIAuthorizerEmbedOnly { + public static final String errMsg = "Metastore Authorization api invocation is disabled" + + " in this configuration."; + + @Override + public void authorizeAuthorizationApiInvocation() throws AuthorizationException { + throw new AuthorizationException(errMsg); + } +} diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java index 8edb253..ecd3d80 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; @@ -38,8 +39,9 @@ public SQLStdHiveAuthorizationValidatorForTest(HiveMetastoreClientFactory metastoreClientFactory, HiveConf conf, HiveAuthenticationProvider authenticator, - SQLStdHiveAccessControllerWrapper privController) { - super(metastoreClientFactory, conf, authenticator, privController); + SQLStdHiveAccessControllerWrapper privController, HiveAuthzSessionContext ctx) + throws HiveAuthzPluginException { + super(metastoreClientFactory, conf, authenticator, privController, ctx); } @Override diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java index bf00ae4..5140115 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java @@ -37,7 +37,7 @@ public HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreC return new HiveAuthorizerImpl( privilegeManager, new SQLStdHiveAuthorizationValidatorForTest(metastoreClientFactory, conf, authenticator, - privilegeManager) + privilegeManager, ctx) ); } } diff --git a/jdbc/pom.xml b/jdbc/pom.xml index 1ad13a7..0132f12 100644 --- a/jdbc/pom.xml +++ b/jdbc/pom.xml @@ -80,6 +80,17 @@ libthrift ${libthrift.version} + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + org.jboss.netty + netty + + + diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java index cbcfec7..e0d2d6d 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java @@ -53,6 +53,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.jdbc.Utils.JdbcConnectionParams; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.auth.KerberosSaslHelper; import org.apache.hive.service.auth.PlainSaslHelper; @@ -86,37 +87,20 @@ */ public class HiveConnection implements java.sql.Connection { public static final Log LOG = LogFactory.getLog(HiveConnection.class.getName()); - private static final String HIVE_AUTH_TYPE= "auth"; - private static final String HIVE_AUTH_QOP = "sasl.qop"; - private static final String HIVE_AUTH_SIMPLE = "noSasl"; - private static final String HIVE_AUTH_TOKEN = "delegationToken"; - private static final String HIVE_AUTH_USER = "user"; - private static final String HIVE_AUTH_PRINCIPAL = "principal"; - private static final String HIVE_AUTH_PASSWD = "password"; - private static final String HIVE_AUTH_KERBEROS_AUTH_TYPE = "kerberosAuthType"; - private static final String HIVE_AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT = "fromSubject"; - private static final String HIVE_ANONYMOUS_USER = "anonymous"; - private static final String HIVE_ANONYMOUS_PASSWD = "anonymous"; - private static final String HIVE_USE_SSL = "ssl"; - private static final String HIVE_SSL_TRUST_STORE = "sslTrustStore"; - private static final String HIVE_SSL_TRUST_STORE_PASSWORD = "trustStorePassword"; - private static final String HIVE_SERVER2_TRANSPORT_MODE = "hive.server2.transport.mode"; - private static final String HIVE_SERVER2_THRIFT_HTTP_PATH = "hive.server2.thrift.http.path"; private static final String HIVE_VAR_PREFIX = "hivevar:"; private static final String HIVE_CONF_PREFIX = "hiveconf:"; - // Currently supports JKS keystore format - // See HIVE-6286 (Add support for PKCS12 keystore format) - private static final String HIVE_SSL_TRUST_STORE_TYPE = "JKS"; - private final String jdbcURI; - private final String host; - private final int port; + private String jdbcUriString; + private String host; + private int port; private final Map sessConfMap; private final Map hiveConfMap; private final Map hiveVarMap; + private JdbcConnectionParams connParams; private final boolean isEmbeddedMode; private TTransport transport; - private TCLIService.Iface client; // todo should be replaced by CliServiceClient + // TODO should be replaced by CliServiceClient + private TCLIService.Iface client; private boolean isClosed = true; private SQLWarning warningChain = null; private TSessionHandle sessHandle = null; @@ -126,14 +110,12 @@ public HiveConnection(String uri, Properties info) throws SQLException { setupLoginTimeout(); - jdbcURI = uri; - // parse the connection uri - Utils.JdbcConnectionParams connParams; try { connParams = Utils.parseURL(uri); - } catch (IllegalArgumentException e) { + } catch (ZooKeeperHiveClientException e) { throw new SQLException(e); } + jdbcUriString = connParams.getJdbcUriString(); // extract parsed connection parameters: // JDBC URL: jdbc:hive2://:/dbName;sess_var_list?hive_conf_list#hive_var_list // each list: =;= and so on @@ -164,14 +146,14 @@ public HiveConnection(String uri, Properties info) throws SQLException { } else { // extract user/password from JDBC connection properties if its not supplied in the // connection URL - if (info.containsKey(HIVE_AUTH_USER)) { - sessConfMap.put(HIVE_AUTH_USER, info.getProperty(HIVE_AUTH_USER)); - if (info.containsKey(HIVE_AUTH_PASSWD)) { - sessConfMap.put(HIVE_AUTH_PASSWD, info.getProperty(HIVE_AUTH_PASSWD)); + if (info.containsKey(JdbcConnectionParams.AUTH_USER)) { + sessConfMap.put(JdbcConnectionParams.AUTH_USER, info.getProperty(JdbcConnectionParams.AUTH_USER)); + if (info.containsKey(JdbcConnectionParams.AUTH_PASSWD)) { + sessConfMap.put(JdbcConnectionParams.AUTH_PASSWD, info.getProperty(JdbcConnectionParams.AUTH_PASSWD)); } } - if (info.containsKey(HIVE_AUTH_TYPE)) { - sessConfMap.put(HIVE_AUTH_TYPE, info.getProperty(HIVE_AUTH_TYPE)); + if (info.containsKey(JdbcConnectionParams.AUTH_TYPE)) { + sessConfMap.put(JdbcConnectionParams.AUTH_TYPE, info.getProperty(JdbcConnectionParams.AUTH_TYPE)); } // open the client transport openTransport(); @@ -189,19 +171,44 @@ public HiveConnection(String uri, Properties info) throws SQLException { supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7); // open client session - openSession(connParams); + openSession(); } private void openTransport() throws SQLException { - // TODO: Refactor transport creation to a factory, it's getting uber messy here - transport = isHttpTransportMode() ? createHttpTransport() : createBinaryTransport(); - try { - if (!transport.isOpen()) { - transport.open(); + while (true) { + try { + transport = isHttpTransportMode() ? createHttpTransport() : createBinaryTransport(); + if (!transport.isOpen()) { + LOG.info("Will try to open client transport with JDBC Uri: " + jdbcUriString); + transport.open(); + } + break; + } catch (TTransportException e) { + LOG.info("Could not open client transport with JDBC Uri: " + jdbcUriString); + // We'll retry till we exhaust all HiveServer2 uris from ZooKeeper + if ((sessConfMap.get(JdbcConnectionParams.SERVICE_DISCOVERY_MODE) != null) + && (JdbcConnectionParams.SERVICE_DISCOVERY_MODE_ZOOKEEPER.equalsIgnoreCase(sessConfMap + .get(JdbcConnectionParams.SERVICE_DISCOVERY_MODE)))) { + try { + // Update jdbcUriString, host & port variables in connParams + // Throw an exception if all HiveServer2 uris have been exhausted, + // or if we're unable to connect to ZooKeeper. + Utils.updateConnParamsFromZooKeeper(connParams); + } catch (ZooKeeperHiveClientException ze) { + throw new SQLException( + "Could not open client transport for any of the Server URI's in ZooKeeper: " + + ze.getMessage(), " 08S01", ze); + } + // Update with new values + jdbcUriString = connParams.getJdbcUriString(); + host = connParams.getHost(); + port = connParams.getPort(); + LOG.info("Will retry opening client transport"); + } else { + throw new SQLException("Could not open client transport with JDBC Uri: " + jdbcUriString + + ": " + e.getMessage(), " 08S01", e); + } } - } catch (TTransportException e) { - throw new SQLException("Could not open connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); } } @@ -211,37 +218,36 @@ private String getServerHttpUrl(boolean useSsl) { String schemeName = useSsl ? "https" : "http"; // http path should begin with "/" String httpPath; - httpPath = hiveConfMap.get(HIVE_SERVER2_THRIFT_HTTP_PATH); - if(httpPath == null) { + httpPath = hiveConfMap.get(JdbcConnectionParams.HTTP_PATH); + if (httpPath == null) { httpPath = "/"; - } - else if(!httpPath.startsWith("/")) { + } else if (!httpPath.startsWith("/")) { httpPath = "/" + httpPath; } - return schemeName + "://" + host + ":" + port + httpPath; + return schemeName + "://" + host + ":" + port + httpPath; } - private TTransport createHttpTransport() throws SQLException { + private TTransport createHttpTransport() throws SQLException, TTransportException { DefaultHttpClient httpClient; - boolean useSsl = isSslConnection(); - // Create an http client from the configs - try { - httpClient = getHttpClient(useSsl); - } catch (Exception e) { - String msg = "Could not create http connection to " + - jdbcURI + ". " + e.getMessage(); - throw new SQLException(msg, " 08S01", e); - } - + httpClient = getHttpClient(useSsl); try { transport = new THttpClient(getServerHttpUrl(useSsl), httpClient); + // We'll call an open/close here to send a test HTTP message to the server. Any + // TTransportException caused by trying to connect to a non-available peer are thrown here. + // Bubbling them up the call hierarchy so that a retry can happen in openTransport, + // if dynamic service discovery is configured. + TCLIService.Iface client = new TCLIService.Client(new TBinaryProtocol(transport)); + TOpenSessionResp openResp = client.OpenSession(new TOpenSessionReq()); + if (openResp != null) { + client.CloseSession(new TCloseSessionReq(openResp.getSessionHandle())); + } } - catch (TTransportException e) { + catch (TException e) { String msg = "Could not create http connection to " + - jdbcURI + ". " + e.getMessage(); - throw new SQLException(msg, " 08S01", e); + jdbcUriString + ". " + e.getMessage(); + throw new TTransportException(msg, e); } return transport; } @@ -263,7 +269,7 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { * for sending to the server before every request. */ requestInterceptor = new HttpKerberosRequestInterceptor( - sessConfMap.get(HIVE_AUTH_PRINCIPAL), host, getServerHttpUrl(false)); + sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL), host, getServerHttpUrl(false)); } else { /** @@ -273,11 +279,23 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword()); // Configure httpClient for SSL if (useSsl) { - String sslTrustStorePath = sessConfMap.get(HIVE_SSL_TRUST_STORE); + String sslTrustStorePath = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); String sslTrustStorePassword = sessConfMap.get( - HIVE_SSL_TRUST_STORE_PASSWORD); + JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); KeyStore sslTrustStore; SSLSocketFactory socketFactory; + /** + * The code within the try block throws: + * 1. SSLInitializationException + * 2. KeyStoreException + * 3. IOException + * 4. NoSuchAlgorithmException + * 5. CertificateException + * 6. KeyManagementException + * 7. UnrecoverableKeyException + * We don't want the client to retry on any of these, hence we catch all + * and throw a SQLException. + */ try { if (sslTrustStorePath == null || sslTrustStorePath.isEmpty()) { // Create a default socket factory based on standard JSSE trust material @@ -285,7 +303,7 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { } else { // Pick trust store config from the given path - sslTrustStore = KeyStore.getInstance(HIVE_SSL_TRUST_STORE_TYPE); + sslTrustStore = KeyStore.getInstance(JdbcConnectionParams.SSL_TRUST_STORE_TYPE); sslTrustStore.load(new FileInputStream(sslTrustStorePath), sslTrustStorePassword.toCharArray()); socketFactory = new SSLSocketFactory(sslTrustStore); @@ -296,7 +314,7 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { } catch (Exception e) { String msg = "Could not create an https connection to " + - jdbcURI + ". " + e.getMessage(); + jdbcUriString + ". " + e.getMessage(); throw new SQLException(msg, " 08S01", e); } } @@ -316,29 +334,32 @@ private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { * - Raw (non-SASL) socket * * Kerberos and Delegation token supports SASL QOP configurations + * @throws SQLException, TTransportException */ - private TTransport createBinaryTransport() throws SQLException { + private TTransport createBinaryTransport() throws SQLException, TTransportException { try { // handle secure connection if specified - if (!HIVE_AUTH_SIMPLE.equals(sessConfMap.get(HIVE_AUTH_TYPE))) { + if (!JdbcConnectionParams.AUTH_SIMPLE.equals(sessConfMap.get(JdbcConnectionParams.AUTH_TYPE))) { // If Kerberos Map saslProps = new HashMap(); SaslQOP saslQOP = SaslQOP.AUTH; - if (sessConfMap.containsKey(HIVE_AUTH_PRINCIPAL)) { - if (sessConfMap.containsKey(HIVE_AUTH_QOP)) { + if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_PRINCIPAL)) { + if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_QOP)) { try { - saslQOP = SaslQOP.fromString(sessConfMap.get(HIVE_AUTH_QOP)); + saslQOP = SaslQOP.fromString(sessConfMap.get(JdbcConnectionParams.AUTH_QOP)); } catch (IllegalArgumentException e) { - throw new SQLException("Invalid " + HIVE_AUTH_QOP + + throw new SQLException("Invalid " + JdbcConnectionParams.AUTH_QOP + " parameter. " + e.getMessage(), "42000", e); } } saslProps.put(Sasl.QOP, saslQOP.toString()); saslProps.put(Sasl.SERVER_AUTH, "true"); - boolean assumeSubject = HIVE_AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT.equals(sessConfMap.get(HIVE_AUTH_KERBEROS_AUTH_TYPE)); + boolean assumeSubject = JdbcConnectionParams.AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT.equals(sessConfMap + .get(JdbcConnectionParams.AUTH_KERBEROS_AUTH_TYPE)); transport = KerberosSaslHelper.getKerberosTransport( - sessConfMap.get(HIVE_AUTH_PRINCIPAL), host, - HiveAuthFactory.getSocketTransport(host, port, loginTimeout), saslProps, assumeSubject); + sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL), host, + HiveAuthFactory.getSocketTransport(host, port, loginTimeout), saslProps, + assumeSubject); } else { // If there's a delegation token available then use token based connection String tokenStr = getClientDelegationToken(sessConfMap); @@ -349,10 +370,15 @@ private TTransport createBinaryTransport() throws SQLException { // we are using PLAIN Sasl connection with user/password String userName = getUserName(); String passwd = getPassword(); + // Note: Thrift returns an SSL socket that is already bound to the specified host:port + // Therefore an open called on this would be a no-op later + // Hence, any TTransportException related to connecting with the peer are thrown here. + // Bubbling them up the call hierarchy so that a retry can happen in openTransport, + // if dynamic service discovery is configured. if (isSslConnection()) { // get SSL socket - String sslTrustStore = sessConfMap.get(HIVE_SSL_TRUST_STORE); - String sslTrustStorePassword = sessConfMap.get(HIVE_SSL_TRUST_STORE_PASSWORD); + String sslTrustStore = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); + String sslTrustStorePassword = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); if (sslTrustStore == null || sslTrustStore.isEmpty()) { transport = HiveAuthFactory.getSSLSocket(host, port, loginTimeout); } else { @@ -373,10 +399,7 @@ private TTransport createBinaryTransport() throws SQLException { } } catch (SaslException e) { throw new SQLException("Could not create secure connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); - } catch (TTransportException e) { - throw new SQLException("Could not create connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); + + jdbcUriString + ": " + e.getMessage(), " 08S01", e); } return transport; } @@ -385,7 +408,7 @@ private TTransport createBinaryTransport() throws SQLException { private String getClientDelegationToken(Map jdbcConnConf) throws SQLException { String tokenStr = null; - if (HIVE_AUTH_TOKEN.equalsIgnoreCase(jdbcConnConf.get(HIVE_AUTH_TYPE))) { + if (JdbcConnectionParams.AUTH_TOKEN.equalsIgnoreCase(jdbcConnConf.get(JdbcConnectionParams.AUTH_TYPE))) { // check delegation token in job conf if any try { tokenStr = ShimLoader.getHadoopShims(). @@ -397,7 +420,7 @@ private String getClientDelegationToken(Map jdbcConnConf) return tokenStr; } - private void openSession(Utils.JdbcConnectionParams connParams) throws SQLException { + private void openSession() throws SQLException { TOpenSessionReq openReq = new TOpenSessionReq(); Map openConf = new HashMap(); @@ -433,7 +456,7 @@ private void openSession(Utils.JdbcConnectionParams connParams) throws SQLExcept } catch (TException e) { LOG.error("Error opening session", e); throw new SQLException("Could not establish connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); + + jdbcUriString + ": " + e.getMessage(), " 08S01", e); } isClosed = false; } @@ -442,27 +465,27 @@ private void openSession(Utils.JdbcConnectionParams connParams) throws SQLExcept * @return username from sessConfMap */ private String getUserName() { - return getSessionValue(HIVE_AUTH_USER, HIVE_ANONYMOUS_USER); + return getSessionValue(JdbcConnectionParams.AUTH_USER, JdbcConnectionParams.ANONYMOUS_USER); } /** * @return password from sessConfMap */ private String getPassword() { - return getSessionValue(HIVE_AUTH_PASSWD, HIVE_ANONYMOUS_PASSWD); + return getSessionValue(JdbcConnectionParams.AUTH_PASSWD, JdbcConnectionParams.ANONYMOUS_PASSWD); } private boolean isSslConnection() { - return "true".equalsIgnoreCase(sessConfMap.get(HIVE_USE_SSL)); + return "true".equalsIgnoreCase(sessConfMap.get(JdbcConnectionParams.USE_SSL)); } private boolean isKerberosAuthMode() { - return !HIVE_AUTH_SIMPLE.equals(sessConfMap.get(HIVE_AUTH_TYPE)) - && sessConfMap.containsKey(HIVE_AUTH_PRINCIPAL); + return !JdbcConnectionParams.AUTH_SIMPLE.equals(sessConfMap.get(JdbcConnectionParams.AUTH_TYPE)) + && sessConfMap.containsKey(JdbcConnectionParams.AUTH_PRINCIPAL); } private boolean isHttpTransportMode() { - String transportMode = hiveConfMap.get(HIVE_SERVER2_TRANSPORT_MODE); + String transportMode = hiveConfMap.get(JdbcConnectionParams.TRANSPORT_MODE); if(transportMode != null && (transportMode.equalsIgnoreCase("http"))) { return true; } diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java b/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java index 6e248d6..396c314 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java @@ -230,7 +230,12 @@ private Properties parseURLforPropertyInfo(String url, Properties defaults) thro throw new SQLException("Invalid connection url: " + url); } - JdbcConnectionParams params = Utils.parseURL(url); + JdbcConnectionParams params = null; + try { + params = Utils.parseURL(url); + } catch (ZooKeeperHiveClientException e) { + throw new SQLException(e); + } String host = params.getHost(); if (host == null){ host = ""; @@ -239,7 +244,7 @@ private Properties parseURLforPropertyInfo(String url, Properties defaults) thro if(host.equals("")){ port = ""; } - else if(port.equals("0")){ + else if(port.equals("0") || port.equals("-1")){ port = Utils.DEFAULT_PORT; } String db = params.getDbName(); diff --git a/jdbc/src/java/org/apache/hive/jdbc/JdbcUriParseException.java b/jdbc/src/java/org/apache/hive/jdbc/JdbcUriParseException.java new file mode 100644 index 0000000..6bb2e20 --- /dev/null +++ b/jdbc/src/java/org/apache/hive/jdbc/JdbcUriParseException.java @@ -0,0 +1,45 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hive.jdbc; + +import java.sql.SQLException; + +public class JdbcUriParseException extends SQLException { + + private static final long serialVersionUID = 0; + + /** + * @param cause (original exception) + */ + public JdbcUriParseException(Throwable cause) { + super(cause); + } + + /** + * @param msg (exception message) + */ + public JdbcUriParseException(String msg) { + super(msg); + } + + /** + * @param msg (exception message) + * @param cause (original exception) + */ + public JdbcUriParseException(String msg, Throwable cause) { + super(msg, cause); + } + +} diff --git a/jdbc/src/java/org/apache/hive/jdbc/Utils.java b/jdbc/src/java/org/apache/hive/jdbc/Utils.java index 58339bf..e6b1a36 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/Utils.java +++ b/jdbc/src/java/org/apache/hive/jdbc/Utils.java @@ -19,17 +19,23 @@ package org.apache.hive.jdbc; import java.net.URI; +import java.net.URISyntaxException; import java.sql.SQLException; +import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.thrift.TStatus; import org.apache.hive.service.cli.thrift.TStatusCode; public class Utils { + public static final Log LOG = LogFactory.getLog(Utils.class.getName()); /** * The required prefix for the connection URL. */ @@ -47,14 +53,58 @@ private static final String URI_JDBC_PREFIX = "jdbc:"; + private static final String URI_HIVE_PREFIX = "hive2:"; + public static class JdbcConnectionParams { + // Note on client side parameter naming convention: + // Prefer using a shorter camelCase param name instead of using the same name as the + // corresponding + // HiveServer2 config. + // For a jdbc url: jdbc:hive2://:/dbName;sess_var_list?hive_conf_list#hive_var_list, + // client side params are specified in sess_var_list + + // Client param names: + static final String AUTH_TYPE = "auth"; + static final String AUTH_QOP = "sasl.qop"; + static final String AUTH_SIMPLE = "noSasl"; + static final String AUTH_TOKEN = "delegationToken"; + static final String AUTH_USER = "user"; + static final String AUTH_PRINCIPAL = "principal"; + static final String AUTH_PASSWD = "password"; + static final String AUTH_KERBEROS_AUTH_TYPE = "kerberosAuthType"; + static final String AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT = "fromSubject"; + static final String ANONYMOUS_USER = "anonymous"; + static final String ANONYMOUS_PASSWD = "anonymous"; + static final String USE_SSL = "ssl"; + static final String SSL_TRUST_STORE = "sslTrustStore"; + static final String SSL_TRUST_STORE_PASSWORD = "trustStorePassword"; + static final String TRANSPORT_MODE = "hive.server2.transport.mode"; + static final String HTTP_PATH = "hive.server2.thrift.http.path"; + static final String SERVICE_DISCOVERY_MODE = "serviceDiscoveryMode"; + // Don't use dynamic serice discovery + static final String SERVICE_DISCOVERY_MODE_NONE = "none"; + // Use ZooKeeper for indirection while using dynamic service discovery + static final String SERVICE_DISCOVERY_MODE_ZOOKEEPER = "zooKeeper"; + static final String ZOOKEEPER_NAMESPACE = "zooKeeperNamespace"; + + // Non-configurable params: + // ZOOKEEPER_SESSION_TIMEOUT is not exposed as client configurable + static final int ZOOKEEPER_SESSION_TIMEOUT = 600 * 1000; + // Currently supports JKS keystore format + static final String SSL_TRUST_STORE_TYPE = "JKS"; + private String host = null; private int port; + private String jdbcUriString; private String dbName = DEFAULT_DATABASE; private Map hiveConfs = new LinkedHashMap(); private Map hiveVars = new LinkedHashMap(); private Map sessionVars = new LinkedHashMap(); private boolean isEmbeddedMode = false; + private String[] authorityList; + private String zooKeeperEnsemble = null; + private String currentHostZnodePath; + private List rejectedHostZnodePaths = new ArrayList(); public JdbcConnectionParams() { } @@ -62,46 +112,94 @@ public JdbcConnectionParams() { public String getHost() { return host; } + public int getPort() { return port; } + + public String getJdbcUriString() { + return jdbcUriString; + } + public String getDbName() { return dbName; } + public Map getHiveConfs() { return hiveConfs; } - public Map getHiveVars() { + + public Map getHiveVars() { return hiveVars; } + public boolean isEmbeddedMode() { return isEmbeddedMode; } + public Map getSessionVars() { return sessionVars; } + public String[] getAuthorityList() { + return authorityList; + } + + public String getZooKeeperEnsemble() { + return zooKeeperEnsemble; + } + + public List getRejectedHostZnodePaths() { + return rejectedHostZnodePaths; + } + + public String getCurrentHostZnodePath() { + return currentHostZnodePath; + } + public void setHost(String host) { this.host = host; } + public void setPort(int port) { this.port = port; } + + public void setJdbcUriString(String jdbcUriString) { + this.jdbcUriString = jdbcUriString; + } + public void setDbName(String dbName) { this.dbName = dbName; } + public void setHiveConfs(Map hiveConfs) { this.hiveConfs = hiveConfs; } - public void setHiveVars(Map hiveVars) { + + public void setHiveVars(Map hiveVars) { this.hiveVars = hiveVars; } + public void setEmbeddedMode(boolean embeddedMode) { this.isEmbeddedMode = embeddedMode; } + public void setSessionVars(Map sessionVars) { this.sessionVars = sessionVars; } + + public void setSuppliedAuthorityList(String[] authorityList) { + this.authorityList = authorityList; + } + + public void setZooKeeperEnsemble(String zooKeeperEnsemble) { + this.zooKeeperEnsemble = zooKeeperEnsemble; + } + + public void setCurrentHostZnodePath(String currentHostZnodePath) { + this.currentHostZnodePath = currentHostZnodePath; + } } // Verify success or success_with_info status, else throw SQLException @@ -124,27 +222,33 @@ public static void verifySuccess(TStatus status, boolean withInfo) throws SQLExc /** * Parse JDBC connection URL - * The new format of the URL is jdbc:hive2://:/dbName;sess_var_list?hive_conf_list#hive_var_list - * where the optional sess, conf and var lists are semicolon separated = pairs. As before, if the - * host/port is not specified, it the driver runs an embedded hive. + * The new format of the URL is: + * jdbc:hive2://:,:/dbName;sess_var_list?hive_conf_list#hive_var_list + * where the optional sess, conf and var lists are semicolon separated = pairs. + * For utilizing dynamic service discovery with HiveServer2 multiple comma separated host:port pairs can + * be specified as shown above. + * The JDBC driver resolves the list of uris and picks a specific server instance to connect to. + * Currently, dynamic service discovery using ZooKeeper is supported, in which case the host:port pairs represent a ZooKeeper ensemble. + * + * As before, if the host/port is not specified, it the driver runs an embedded hive. * examples - * jdbc:hive2://ubuntu:11000/db2?hive.cli.conf.printheader=true;hive.exec.mode.local.auto.inputbytes.max=9999#stab=salesTable;icol=customerID * jdbc:hive2://?hive.cli.conf.printheader=true;hive.exec.mode.local.auto.inputbytes.max=9999#stab=salesTable;icol=customerID * jdbc:hive2://ubuntu:11000/db2;user=foo;password=bar * * Connect to http://server:10001/hs2, with specified basicAuth credentials and initial database: - * jdbc:hive2://server:10001/db;user=foo;password=bar?hive.server2.transport.mode=http;hive.server2.thrift.http.path=hs2 - * - * Note that currently the session properties are not used. + * jdbc:hive2://server:10001/db;user=foo;password=bar?hive.server2.transport.mode=http;hive.server2.thrift.http.path=hs2 * * @param uri * @return + * @throws SQLException */ - public static JdbcConnectionParams parseURL(String uri) throws IllegalArgumentException { + public static JdbcConnectionParams parseURL(String uri) throws JdbcUriParseException, + SQLException, ZooKeeperHiveClientException { JdbcConnectionParams connParams = new JdbcConnectionParams(); if (!uri.startsWith(URL_PREFIX)) { - throw new IllegalArgumentException("Bad URL format: Missing prefix " + URL_PREFIX); + throw new JdbcUriParseException("Bad URL format: Missing prefix " + URL_PREFIX); } // For URLs with no other configuration @@ -154,29 +258,28 @@ public static JdbcConnectionParams parseURL(String uri) throws IllegalArgumentEx return connParams; } - URI jdbcURI = URI.create(uri.substring(URI_JDBC_PREFIX.length())); - - // Check to prevent unintentional use of embedded mode. A missing "/" - // to separate the 'path' portion of URI can result in this. - // The missing "/" common typo while using secure mode, eg of such url - - // jdbc:hive2://localhost:10000;principal=hive/HiveServer2Host@YOUR-REALM.COM - if((jdbcURI.getAuthority() != null) && (jdbcURI.getHost()==null)) { - throw new IllegalArgumentException("Bad URL format. Hostname not found " - + " in authority part of the url: " + jdbcURI.getAuthority() - + ". Are you missing a '/' after the hostname ?"); - } - - connParams.setHost(jdbcURI.getHost()); - if (connParams.getHost() == null) { + // The JDBC URI now supports specifying multiple host:port if dynamic service discovery is + // configured on HiveServer2 (like: host1:port1,host2:port2,host3:port3) + // We'll extract the authorities (host:port combo) from the URI, extract session vars, hive + // confs & hive vars by parsing it as a Java URI. + // To parse the intermediate URI as a Java URI, we'll give a dummy authority(dummy:00000). + // Later, we'll substitute the dummy authority for a resolved authority. + String dummyAuthorityString = "dummyhost:00000"; + String suppliedAuthorities = getAuthorities(uri, connParams); + if ((suppliedAuthorities == null) || (suppliedAuthorities.isEmpty())) { + // Given uri of the form: + // jdbc:hive2:///dbName;sess_var_list?hive_conf_list#hive_var_list connParams.setEmbeddedMode(true); } else { - int port = jdbcURI.getPort(); - if (port == -1) { - port = Integer.valueOf(DEFAULT_PORT); - } - connParams.setPort(port); + LOG.info("Supplied authorities: " + suppliedAuthorities); + String[] authorityList = suppliedAuthorities.split(","); + connParams.setSuppliedAuthorityList(authorityList); + uri = uri.replace(suppliedAuthorities, dummyAuthorityString); } + // Now parse the connection uri with dummy authority + URI jdbcURI = URI.create(uri.substring(URI_JDBC_PREFIX.length())); + // key=value pattern Pattern pattern = Pattern.compile("([^;]*)=([^;]*)[;]?"); @@ -192,12 +295,13 @@ public static JdbcConnectionParams parseURL(String uri) throws IllegalArgumentEx } else { // we have dbname followed by session parameters dbName = sessVars.substring(0, sessVars.indexOf(';')); - sessVars = sessVars.substring(sessVars.indexOf(';')+1); + sessVars = sessVars.substring(sessVars.indexOf(';') + 1); if (sessVars != null) { Matcher sessMatcher = pattern.matcher(sessVars); while (sessMatcher.find()) { if (connParams.getSessionVars().put(sessMatcher.group(1), sessMatcher.group(2)) != null) { - throw new IllegalArgumentException("Bad URL format: Multiple values for property " + sessMatcher.group(1)); + throw new JdbcUriParseException("Bad URL format: Multiple values for property " + + sessMatcher.group(1)); } } } @@ -225,10 +329,146 @@ public static JdbcConnectionParams parseURL(String uri) throws IllegalArgumentEx } } + // Extract host, port + if (connParams.isEmbeddedMode()) { + // In case of embedded mode we were supplied with an empty authority. + // So we never substituted the authority with a dummy one. + connParams.setHost(jdbcURI.getHost()); + connParams.setPort(jdbcURI.getPort()); + } else { + // Else substitute the dummy authority with a resolved one. + // In case of dynamic service discovery using ZooKeeper, it picks a server uri from ZooKeeper + String resolvedAuthorityString = resolveAuthority(connParams); + uri = uri.replace(dummyAuthorityString, resolvedAuthorityString); + connParams.setJdbcUriString(uri); + // Create a Java URI from the resolved URI for extracting the host/port + URI resolvedAuthorityURI = null; + try { + resolvedAuthorityURI = new URI(null, resolvedAuthorityString, null, null, null); + } catch (URISyntaxException e) { + throw new JdbcUriParseException("Bad URL format: ", e); + } + connParams.setHost(resolvedAuthorityURI.getHost()); + connParams.setPort(resolvedAuthorityURI.getPort()); + } + return connParams; } /** + * Get the authority string from the supplied uri, which could potentially contain multiple + * host:port pairs. + * + * @param uri + * @param connParams + * @return + * @throws JdbcUriParseException + */ + private static String getAuthorities(String uri, JdbcConnectionParams connParams) + throws JdbcUriParseException { + String authorities; + // For a jdbc uri like: jdbc:hive2://host1:port1,host2:port2,host3:port3/ + // Extract the uri host:port list starting after "jdbc:hive2://", till the 1st "/" or EOL + int fromIndex = Utils.URL_PREFIX.length(); + int toIndex = uri.indexOf("/", fromIndex); + if (toIndex < 0) { + authorities = uri.substring(fromIndex); + } else { + authorities = uri.substring(fromIndex, uri.indexOf("/", fromIndex)); + } + return authorities; + } + + /** + * Get a string representing a specific host:port + * @param connParams + * @return + * @throws JdbcUriParseException + * @throws ZooKeeperHiveClientException + */ + private static String resolveAuthority(JdbcConnectionParams connParams) + throws JdbcUriParseException, ZooKeeperHiveClientException { + String serviceDiscoveryMode = + connParams.getSessionVars().get(JdbcConnectionParams.SERVICE_DISCOVERY_MODE); + if ((serviceDiscoveryMode != null) + && (JdbcConnectionParams.SERVICE_DISCOVERY_MODE_ZOOKEEPER + .equalsIgnoreCase(serviceDiscoveryMode))) { + // Resolve using ZooKeeper + return resolveAuthorityUsingZooKeeper(connParams); + } else { + String authority = connParams.getAuthorityList()[0]; + URI jdbcURI = URI.create(URI_HIVE_PREFIX + "//" + authority); + // Check to prevent unintentional use of embedded mode. A missing "/" + // to separate the 'path' portion of URI can result in this. + // The missing "/" common typo while using secure mode, eg of such url - + // jdbc:hive2://localhost:10000;principal=hive/HiveServer2Host@YOUR-REALM.COM + if ((jdbcURI.getAuthority() != null) && (jdbcURI.getHost() == null)) { + throw new JdbcUriParseException("Bad URL format. Hostname not found " + + " in authority part of the url: " + jdbcURI.getAuthority() + + ". Are you missing a '/' after the hostname ?"); + } + // Return the 1st element of the array + return jdbcURI.getAuthority(); + } + } + + /** + * Read a specific host:port from ZooKeeper + * @param connParams + * @return + * @throws ZooKeeperHiveClientException + */ + private static String resolveAuthorityUsingZooKeeper(JdbcConnectionParams connParams) + throws ZooKeeperHiveClientException { + // Set ZooKeeper ensemble in connParams for later use + connParams.setZooKeeperEnsemble(joinStringArray(connParams.getAuthorityList(), ",")); + return ZooKeeperHiveClientHelper.getNextServerUriFromZooKeeper(connParams); + } + + /** + * Read the next server coordinates (host:port combo) from ZooKeeper. Ignore the znodes already + * explored. Also update the host, port, jdbcUriString fields of connParams. + * + * @param connParams + * @throws ZooKeeperHiveClientException + */ + static void updateConnParamsFromZooKeeper(JdbcConnectionParams connParams) + throws ZooKeeperHiveClientException { + // Add current host to the rejected list + connParams.getRejectedHostZnodePaths().add(connParams.getCurrentHostZnodePath()); + // Get another HiveServer2 uri from ZooKeeper + String serverUriString = ZooKeeperHiveClientHelper.getNextServerUriFromZooKeeper(connParams); + // Parse serverUri to a java URI and extract host, port + URI serverUri = null; + try { + // Note URL_PREFIX is not a valid scheme format, therefore leaving it null in the constructor + // to construct a valid URI + serverUri = new URI(null, serverUriString, null, null, null); + } catch (URISyntaxException e) { + throw new ZooKeeperHiveClientException(e); + } + String oldServerHost = connParams.getHost(); + int oldServerPort = connParams.getPort(); + String newServerHost = serverUri.getHost(); + int newServerPort = serverUri.getPort(); + connParams.setHost(newServerHost); + connParams.setPort(newServerPort); + connParams.setJdbcUriString(connParams.getJdbcUriString().replace( + oldServerHost + ":" + oldServerPort, newServerHost + ":" + newServerPort)); + } + + private static String joinStringArray(String[] stringArray, String seperator) { + StringBuilder stringBuilder = new StringBuilder(); + for (int cur = 0, end = stringArray.length; cur < end; cur++) { + if (cur > 0) { + stringBuilder.append(seperator); + } + stringBuilder.append(stringArray[cur]); + } + return stringBuilder.toString(); + } + + /** * Takes a version string delimited by '.' and '-' characters * and returns a partial version. * diff --git a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientException.java b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientException.java new file mode 100644 index 0000000..186c676 --- /dev/null +++ b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientException.java @@ -0,0 +1,43 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hive.jdbc; + +public class ZooKeeperHiveClientException extends Exception { + + private static final long serialVersionUID = 0; + + /** + * @param cause (original exception) + */ + public ZooKeeperHiveClientException(Throwable cause) { + super(cause); + } + + /** + * @param msg (exception message) + */ + public ZooKeeperHiveClientException(String msg) { + super(msg); + } + + /** + * @param msg (exception message) + * @param cause (original exception) + */ + public ZooKeeperHiveClientException(String msg, Throwable cause) { + super(msg, cause); + } + +} diff --git a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java new file mode 100644 index 0000000..06795a5 --- /dev/null +++ b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.jdbc; + +import java.nio.charset.Charset; +import java.sql.SQLException; +import java.util.List; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hive.jdbc.Utils.JdbcConnectionParams; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooKeeper; + +public class ZooKeeperHiveClientHelper { + public static final Log LOG = LogFactory.getLog(ZooKeeperHiveClientHelper.class.getName()); + + /** + * A no-op watcher class + */ + public static class DummyWatcher implements Watcher { + public void process(org.apache.zookeeper.WatchedEvent event) { + } + } + + /** + * Resolve to a host:port by connecting to ZooKeeper and picking a host randomly. + * + * @param uri + * @param connParams + * @return + * @throws SQLException + */ + static String getNextServerUriFromZooKeeper(JdbcConnectionParams connParams) + throws ZooKeeperHiveClientException { + String zooKeeperEnsemble = connParams.getZooKeeperEnsemble(); + String zooKeeperNamespace = + connParams.getSessionVars().get(JdbcConnectionParams.ZOOKEEPER_NAMESPACE); + List serverHosts; + Random randomizer = new Random(); + String serverNode; + // Pick a random HiveServer2 host from the ZooKeeper namspace + try { + ZooKeeper zooKeeperClient = + new ZooKeeper(zooKeeperEnsemble, JdbcConnectionParams.ZOOKEEPER_SESSION_TIMEOUT, + new ZooKeeperHiveClientHelper.DummyWatcher()); + // All the HiveServer2 host nodes that are in ZooKeeper currently + serverHosts = zooKeeperClient.getChildren("/" + zooKeeperNamespace, false); + // Remove the znodes we've already tried from this list + serverHosts.removeAll(connParams.getRejectedHostZnodePaths()); + if (serverHosts.isEmpty()) { + throw new ZooKeeperHiveClientException( + "Tried all existing HiveServer2 uris from ZooKeeper."); + } + // Now pick a host randomly + serverNode = serverHosts.get(randomizer.nextInt(serverHosts.size())); + connParams.setCurrentHostZnodePath(serverNode); + // Read the value from the node (UTF-8 enoded byte array) and convert it to a String + String serverUri = + new String(zooKeeperClient.getData("/" + zooKeeperNamespace + "/" + serverNode, false, + null), Charset.forName("UTF-8")); + LOG.info("Selected HiveServer2 instance with uri: " + serverUri); + return serverUri; + } catch (Exception e) { + throw new ZooKeeperHiveClientException("Unable to read HiveServer2 uri from ZooKeeper", e); + } + } + +} diff --git a/metastore/scripts/upgrade/mssql/hive-schema-0.14.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-0.14.0.mssql.sql new file mode 100644 index 0000000..a9f7b83 --- /dev/null +++ b/metastore/scripts/upgrade/mssql/hive-schema-0.14.0.mssql.sql @@ -0,0 +1,926 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +------------------------------------------------------------------ +-- DataNucleus SchemaTool (ran at 08/04/2014 15:10:15) +------------------------------------------------------------------ +-- Complete schema required for the following classes:- +-- org.apache.hadoop.hive.metastore.model.MColumnDescriptor +-- org.apache.hadoop.hive.metastore.model.MDBPrivilege +-- org.apache.hadoop.hive.metastore.model.MDatabase +-- org.apache.hadoop.hive.metastore.model.MDelegationToken +-- org.apache.hadoop.hive.metastore.model.MFieldSchema +-- org.apache.hadoop.hive.metastore.model.MFunction +-- org.apache.hadoop.hive.metastore.model.MGlobalPrivilege +-- org.apache.hadoop.hive.metastore.model.MIndex +-- org.apache.hadoop.hive.metastore.model.MMasterKey +-- org.apache.hadoop.hive.metastore.model.MOrder +-- org.apache.hadoop.hive.metastore.model.MPartition +-- org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege +-- org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics +-- org.apache.hadoop.hive.metastore.model.MPartitionEvent +-- org.apache.hadoop.hive.metastore.model.MPartitionPrivilege +-- org.apache.hadoop.hive.metastore.model.MResourceUri +-- org.apache.hadoop.hive.metastore.model.MRole +-- org.apache.hadoop.hive.metastore.model.MRoleMap +-- org.apache.hadoop.hive.metastore.model.MSerDeInfo +-- org.apache.hadoop.hive.metastore.model.MStorageDescriptor +-- org.apache.hadoop.hive.metastore.model.MStringList +-- org.apache.hadoop.hive.metastore.model.MTable +-- org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege +-- org.apache.hadoop.hive.metastore.model.MTableColumnStatistics +-- org.apache.hadoop.hive.metastore.model.MTablePrivilege +-- org.apache.hadoop.hive.metastore.model.MType +-- org.apache.hadoop.hive.metastore.model.MVersionTable +-- +-- Table MASTER_KEYS for classes [org.apache.hadoop.hive.metastore.model.MMasterKey] +CREATE TABLE MASTER_KEYS +( + KEY_ID int NOT NULL, + MASTER_KEY varchar(767) NULL +); + +ALTER TABLE MASTER_KEYS ADD CONSTRAINT MASTER_KEYS_PK PRIMARY KEY (KEY_ID); + +-- Table IDXS for classes [org.apache.hadoop.hive.metastore.model.MIndex] +CREATE TABLE IDXS +( + INDEX_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + DEFERRED_REBUILD bit NOT NULL, + INDEX_HANDLER_CLASS varchar(4000) NULL, + INDEX_NAME varchar(128) NULL, + INDEX_TBL_ID bigint NULL, + LAST_ACCESS_TIME int NOT NULL, + ORIG_TBL_ID bigint NULL, + SD_ID bigint NULL +); + +ALTER TABLE IDXS ADD CONSTRAINT IDXS_PK PRIMARY KEY (INDEX_ID); + +-- Table PART_COL_STATS for classes [org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics] +CREATE TABLE PART_COL_STATS +( + CS_ID bigint NOT NULL, + AVG_COL_LEN float NULL, + "COLUMN_NAME" varchar(128) NOT NULL, + COLUMN_TYPE varchar(128) NOT NULL, + DB_NAME varchar(128) NOT NULL, + BIG_DECIMAL_HIGH_VALUE varchar(255) NULL, + BIG_DECIMAL_LOW_VALUE varchar(255) NULL, + DOUBLE_HIGH_VALUE float NULL, + DOUBLE_LOW_VALUE float NULL, + LAST_ANALYZED bigint NOT NULL, + LONG_HIGH_VALUE bigint NULL, + LONG_LOW_VALUE bigint NULL, + MAX_COL_LEN bigint NULL, + NUM_DISTINCTS bigint NULL, + NUM_FALSES bigint NULL, + NUM_NULLS bigint NOT NULL, + NUM_TRUES bigint NULL, + PART_ID bigint NULL, + PARTITION_NAME varchar(767) NOT NULL, + "TABLE_NAME" varchar(128) NOT NULL +); + +ALTER TABLE PART_COL_STATS ADD CONSTRAINT PART_COL_STATS_PK PRIMARY KEY (CS_ID); + +CREATE INDEX PCS_STATS_IDX ON PART_COL_STATS (DB_NAME,TABLE_NAME,COLUMN_NAME,PARTITION_NAME); + +-- Table PART_PRIVS for classes [org.apache.hadoop.hive.metastore.model.MPartitionPrivilege] +CREATE TABLE PART_PRIVS +( + PART_GRANT_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)), + GRANTOR varchar(128) NULL, + GRANTOR_TYPE varchar(128) NULL, + PART_ID bigint NULL, + PRINCIPAL_NAME varchar(128) NULL, + PRINCIPAL_TYPE varchar(128) NULL, + PART_PRIV varchar(128) NULL +); + +ALTER TABLE PART_PRIVS ADD CONSTRAINT PART_PRIVS_PK PRIMARY KEY (PART_GRANT_ID); + +-- Table SKEWED_STRING_LIST for classes [org.apache.hadoop.hive.metastore.model.MStringList] +CREATE TABLE SKEWED_STRING_LIST +( + STRING_LIST_ID bigint NOT NULL +); + +ALTER TABLE SKEWED_STRING_LIST ADD CONSTRAINT SKEWED_STRING_LIST_PK PRIMARY KEY (STRING_LIST_ID); + +-- Table ROLES for classes [org.apache.hadoop.hive.metastore.model.MRole] +CREATE TABLE ROLES +( + ROLE_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + OWNER_NAME varchar(128) NULL, + ROLE_NAME varchar(128) NULL +); + +ALTER TABLE ROLES ADD CONSTRAINT ROLES_PK PRIMARY KEY (ROLE_ID); + +-- Table PARTITIONS for classes [org.apache.hadoop.hive.metastore.model.MPartition] +CREATE TABLE PARTITIONS +( + PART_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + LAST_ACCESS_TIME int NOT NULL, + PART_NAME varchar(767) NULL, + SD_ID bigint NULL, + TBL_ID bigint NULL +); + +ALTER TABLE PARTITIONS ADD CONSTRAINT PARTITIONS_PK PRIMARY KEY (PART_ID); + +-- Table CDS for classes [org.apache.hadoop.hive.metastore.model.MColumnDescriptor] +CREATE TABLE CDS +( + CD_ID bigint NOT NULL +); + +ALTER TABLE CDS ADD CONSTRAINT CDS_PK PRIMARY KEY (CD_ID); + +-- Table VERSION for classes [org.apache.hadoop.hive.metastore.model.MVersionTable] +CREATE TABLE VERSION +( + VER_ID bigint NOT NULL, + SCHEMA_VERSION varchar(127) NOT NULL, + VERSION_COMMENT varchar(255) NOT NULL +); + +ALTER TABLE VERSION ADD CONSTRAINT VERSION_PK PRIMARY KEY (VER_ID); + +-- Table GLOBAL_PRIVS for classes [org.apache.hadoop.hive.metastore.model.MGlobalPrivilege] +CREATE TABLE GLOBAL_PRIVS +( + USER_GRANT_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)), + GRANTOR varchar(128) NULL, + GRANTOR_TYPE varchar(128) NULL, + PRINCIPAL_NAME varchar(128) NULL, + PRINCIPAL_TYPE varchar(128) NULL, + USER_PRIV varchar(128) NULL +); + +ALTER TABLE GLOBAL_PRIVS ADD CONSTRAINT GLOBAL_PRIVS_PK PRIMARY KEY (USER_GRANT_ID); + +-- Table PART_COL_PRIVS for classes [org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege] +CREATE TABLE PART_COL_PRIVS +( + PART_COLUMN_GRANT_ID bigint NOT NULL, + "COLUMN_NAME" varchar(128) NULL, + CREATE_TIME int NOT NULL, + GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)), + GRANTOR varchar(128) NULL, + GRANTOR_TYPE varchar(128) NULL, + PART_ID bigint NULL, + PRINCIPAL_NAME varchar(128) NULL, + PRINCIPAL_TYPE varchar(128) NULL, + PART_COL_PRIV varchar(128) NULL +); + +ALTER TABLE PART_COL_PRIVS ADD CONSTRAINT PART_COL_PRIVS_PK PRIMARY KEY (PART_COLUMN_GRANT_ID); + +-- Table DB_PRIVS for classes [org.apache.hadoop.hive.metastore.model.MDBPrivilege] +CREATE TABLE DB_PRIVS +( + DB_GRANT_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + DB_ID bigint NULL, + GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)), + GRANTOR varchar(128) NULL, + GRANTOR_TYPE varchar(128) NULL, + PRINCIPAL_NAME varchar(128) NULL, + PRINCIPAL_TYPE varchar(128) NULL, + DB_PRIV varchar(128) NULL +); + +ALTER TABLE DB_PRIVS ADD CONSTRAINT DB_PRIVS_PK PRIMARY KEY (DB_GRANT_ID); + +-- Table TAB_COL_STATS for classes [org.apache.hadoop.hive.metastore.model.MTableColumnStatistics] +CREATE TABLE TAB_COL_STATS +( + CS_ID bigint NOT NULL, + AVG_COL_LEN float NULL, + "COLUMN_NAME" varchar(128) NOT NULL, + COLUMN_TYPE varchar(128) NOT NULL, + DB_NAME varchar(128) NOT NULL, + BIG_DECIMAL_HIGH_VALUE varchar(255) NULL, + BIG_DECIMAL_LOW_VALUE varchar(255) NULL, + DOUBLE_HIGH_VALUE float NULL, + DOUBLE_LOW_VALUE float NULL, + LAST_ANALYZED bigint NOT NULL, + LONG_HIGH_VALUE bigint NULL, + LONG_LOW_VALUE bigint NULL, + MAX_COL_LEN bigint NULL, + NUM_DISTINCTS bigint NULL, + NUM_FALSES bigint NULL, + NUM_NULLS bigint NOT NULL, + NUM_TRUES bigint NULL, + TBL_ID bigint NULL, + "TABLE_NAME" varchar(128) NOT NULL +); + +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT TAB_COL_STATS_PK PRIMARY KEY (CS_ID); + +-- Table TYPES for classes [org.apache.hadoop.hive.metastore.model.MType] +CREATE TABLE TYPES +( + TYPES_ID bigint NOT NULL, + TYPE_NAME varchar(128) NULL, + TYPE1 varchar(767) NULL, + TYPE2 varchar(767) NULL +); + +ALTER TABLE TYPES ADD CONSTRAINT TYPES_PK PRIMARY KEY (TYPES_ID); + +-- Table TBL_PRIVS for classes [org.apache.hadoop.hive.metastore.model.MTablePrivilege] +CREATE TABLE TBL_PRIVS +( + TBL_GRANT_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)), + GRANTOR varchar(128) NULL, + GRANTOR_TYPE varchar(128) NULL, + PRINCIPAL_NAME varchar(128) NULL, + PRINCIPAL_TYPE varchar(128) NULL, + TBL_PRIV varchar(128) NULL, + TBL_ID bigint NULL +); + +ALTER TABLE TBL_PRIVS ADD CONSTRAINT TBL_PRIVS_PK PRIMARY KEY (TBL_GRANT_ID); + +-- Table DBS for classes [org.apache.hadoop.hive.metastore.model.MDatabase] +CREATE TABLE DBS +( + DB_ID bigint NOT NULL, + "DESC" varchar(4000) NULL, + DB_LOCATION_URI varchar(4000) NOT NULL, + "NAME" varchar(128) NULL, + OWNER_NAME varchar(128) NULL, + OWNER_TYPE varchar(10) NULL +); + +ALTER TABLE DBS ADD CONSTRAINT DBS_PK PRIMARY KEY (DB_ID); + +-- Table TBL_COL_PRIVS for classes [org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege] +CREATE TABLE TBL_COL_PRIVS +( + TBL_COLUMN_GRANT_ID bigint NOT NULL, + "COLUMN_NAME" varchar(128) NULL, + CREATE_TIME int NOT NULL, + GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)), + GRANTOR varchar(128) NULL, + GRANTOR_TYPE varchar(128) NULL, + PRINCIPAL_NAME varchar(128) NULL, + PRINCIPAL_TYPE varchar(128) NULL, + TBL_COL_PRIV varchar(128) NULL, + TBL_ID bigint NULL +); + +ALTER TABLE TBL_COL_PRIVS ADD CONSTRAINT TBL_COL_PRIVS_PK PRIMARY KEY (TBL_COLUMN_GRANT_ID); + +-- Table DELEGATION_TOKENS for classes [org.apache.hadoop.hive.metastore.model.MDelegationToken] +CREATE TABLE DELEGATION_TOKENS +( + TOKEN_IDENT varchar(767) NOT NULL, + TOKEN varchar(767) NULL +); + +ALTER TABLE DELEGATION_TOKENS ADD CONSTRAINT DELEGATION_TOKENS_PK PRIMARY KEY (TOKEN_IDENT); + +-- Table SERDES for classes [org.apache.hadoop.hive.metastore.model.MSerDeInfo] +CREATE TABLE SERDES +( + SERDE_ID bigint NOT NULL, + "NAME" varchar(128) NULL, + SLIB varchar(4000) NULL +); + +ALTER TABLE SERDES ADD CONSTRAINT SERDES_PK PRIMARY KEY (SERDE_ID); + +-- Table FUNCS for classes [org.apache.hadoop.hive.metastore.model.MFunction] +CREATE TABLE FUNCS +( + FUNC_ID bigint NOT NULL, + CLASS_NAME varchar(4000) NULL, + CREATE_TIME int NOT NULL, + DB_ID bigint NULL, + FUNC_NAME varchar(128) NULL, + FUNC_TYPE int NOT NULL, + OWNER_NAME varchar(128) NULL, + OWNER_TYPE varchar(10) NULL +); + +ALTER TABLE FUNCS ADD CONSTRAINT FUNCS_PK PRIMARY KEY (FUNC_ID); + +-- Table ROLE_MAP for classes [org.apache.hadoop.hive.metastore.model.MRoleMap] +CREATE TABLE ROLE_MAP +( + ROLE_GRANT_ID bigint NOT NULL, + ADD_TIME int NOT NULL, + GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)), + GRANTOR varchar(128) NULL, + GRANTOR_TYPE varchar(128) NULL, + PRINCIPAL_NAME varchar(128) NULL, + PRINCIPAL_TYPE varchar(128) NULL, + ROLE_ID bigint NULL +); + +ALTER TABLE ROLE_MAP ADD CONSTRAINT ROLE_MAP_PK PRIMARY KEY (ROLE_GRANT_ID); + +-- Table TBLS for classes [org.apache.hadoop.hive.metastore.model.MTable] +CREATE TABLE TBLS +( + TBL_ID bigint NOT NULL, + CREATE_TIME int NOT NULL, + DB_ID bigint NULL, + LAST_ACCESS_TIME int NOT NULL, + OWNER varchar(767) NULL, + RETENTION int NOT NULL, + SD_ID bigint NULL, + TBL_NAME varchar(128) NULL, + TBL_TYPE varchar(128) NULL, + VIEW_EXPANDED_TEXT text NULL, + VIEW_ORIGINAL_TEXT text NULL +); + +ALTER TABLE TBLS ADD CONSTRAINT TBLS_PK PRIMARY KEY (TBL_ID); + +-- Table SDS for classes [org.apache.hadoop.hive.metastore.model.MStorageDescriptor] +CREATE TABLE SDS +( + SD_ID bigint NOT NULL, + CD_ID bigint NULL, + INPUT_FORMAT varchar(4000) NULL, + IS_COMPRESSED bit NOT NULL, + IS_STOREDASSUBDIRECTORIES bit NOT NULL, + LOCATION varchar(4000) NULL, + NUM_BUCKETS int NOT NULL, + OUTPUT_FORMAT varchar(4000) NULL, + SERDE_ID bigint NULL +); + +ALTER TABLE SDS ADD CONSTRAINT SDS_PK PRIMARY KEY (SD_ID); + +-- Table PARTITION_EVENTS for classes [org.apache.hadoop.hive.metastore.model.MPartitionEvent] +CREATE TABLE PARTITION_EVENTS +( + PART_NAME_ID bigint NOT NULL, + DB_NAME varchar(128) NULL, + EVENT_TIME bigint NOT NULL, + EVENT_TYPE int NOT NULL, + PARTITION_NAME varchar(767) NULL, + TBL_NAME varchar(128) NULL +); + +ALTER TABLE PARTITION_EVENTS ADD CONSTRAINT PARTITION_EVENTS_PK PRIMARY KEY (PART_NAME_ID); + +-- Table SORT_COLS for join relationship +CREATE TABLE SORT_COLS +( + SD_ID bigint NOT NULL, + "COLUMN_NAME" varchar(128) NULL, + "ORDER" int NOT NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE SORT_COLS ADD CONSTRAINT SORT_COLS_PK PRIMARY KEY (SD_ID,INTEGER_IDX); + +-- Table SKEWED_COL_NAMES for join relationship +CREATE TABLE SKEWED_COL_NAMES +( + SD_ID bigint NOT NULL, + SKEWED_COL_NAME varchar(255) NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE SKEWED_COL_NAMES ADD CONSTRAINT SKEWED_COL_NAMES_PK PRIMARY KEY (SD_ID,INTEGER_IDX); + +-- Table SKEWED_COL_VALUE_LOC_MAP for join relationship +CREATE TABLE SKEWED_COL_VALUE_LOC_MAP +( + SD_ID bigint NOT NULL, + STRING_LIST_ID_KID bigint NOT NULL, + LOCATION varchar(4000) NULL +); + +ALTER TABLE SKEWED_COL_VALUE_LOC_MAP ADD CONSTRAINT SKEWED_COL_VALUE_LOC_MAP_PK PRIMARY KEY (SD_ID,STRING_LIST_ID_KID); + +-- Table SKEWED_STRING_LIST_VALUES for join relationship +CREATE TABLE SKEWED_STRING_LIST_VALUES +( + STRING_LIST_ID bigint NOT NULL, + STRING_LIST_VALUE varchar(255) NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE SKEWED_STRING_LIST_VALUES ADD CONSTRAINT SKEWED_STRING_LIST_VALUES_PK PRIMARY KEY (STRING_LIST_ID,INTEGER_IDX); + +-- Table PARTITION_KEY_VALS for join relationship +CREATE TABLE PARTITION_KEY_VALS +( + PART_ID bigint NOT NULL, + PART_KEY_VAL varchar(255) NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE PARTITION_KEY_VALS ADD CONSTRAINT PARTITION_KEY_VALS_PK PRIMARY KEY (PART_ID,INTEGER_IDX); + +-- Table PARTITION_KEYS for join relationship +CREATE TABLE PARTITION_KEYS +( + TBL_ID bigint NOT NULL, + PKEY_COMMENT varchar(4000) NULL, + PKEY_NAME varchar(128) NOT NULL, + PKEY_TYPE varchar(767) NOT NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE PARTITION_KEYS ADD CONSTRAINT PARTITION_KEY_PK PRIMARY KEY (TBL_ID,PKEY_NAME); + +-- Table SKEWED_VALUES for join relationship +CREATE TABLE SKEWED_VALUES +( + SD_ID_OID bigint NOT NULL, + STRING_LIST_ID_EID bigint NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE SKEWED_VALUES ADD CONSTRAINT SKEWED_VALUES_PK PRIMARY KEY (SD_ID_OID,INTEGER_IDX); + +-- Table SD_PARAMS for join relationship +CREATE TABLE SD_PARAMS +( + SD_ID bigint NOT NULL, + PARAM_KEY varchar(256) NOT NULL, + PARAM_VALUE varchar(4000) NULL +); + +ALTER TABLE SD_PARAMS ADD CONSTRAINT SD_PARAMS_PK PRIMARY KEY (SD_ID,PARAM_KEY); + +-- Table FUNC_RU for join relationship +CREATE TABLE FUNC_RU +( + FUNC_ID bigint NOT NULL, + RESOURCE_TYPE int NOT NULL, + RESOURCE_URI varchar(4000) NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE FUNC_RU ADD CONSTRAINT FUNC_RU_PK PRIMARY KEY (FUNC_ID,INTEGER_IDX); + +-- Table TYPE_FIELDS for join relationship +CREATE TABLE TYPE_FIELDS +( + TYPE_NAME bigint NOT NULL, + COMMENT varchar(256) NULL, + FIELD_NAME varchar(128) NOT NULL, + FIELD_TYPE varchar(767) NOT NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE TYPE_FIELDS ADD CONSTRAINT TYPE_FIELDS_PK PRIMARY KEY (TYPE_NAME,FIELD_NAME); + +-- Table BUCKETING_COLS for join relationship +CREATE TABLE BUCKETING_COLS +( + SD_ID bigint NOT NULL, + BUCKET_COL_NAME varchar(255) NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE BUCKETING_COLS ADD CONSTRAINT BUCKETING_COLS_PK PRIMARY KEY (SD_ID,INTEGER_IDX); + +-- Table DATABASE_PARAMS for join relationship +CREATE TABLE DATABASE_PARAMS +( + DB_ID bigint NOT NULL, + PARAM_KEY varchar(180) NOT NULL, + PARAM_VALUE varchar(4000) NULL +); + +ALTER TABLE DATABASE_PARAMS ADD CONSTRAINT DATABASE_PARAMS_PK PRIMARY KEY (DB_ID,PARAM_KEY); + +-- Table INDEX_PARAMS for join relationship +CREATE TABLE INDEX_PARAMS +( + INDEX_ID bigint NOT NULL, + PARAM_KEY varchar(256) NOT NULL, + PARAM_VALUE varchar(4000) NULL +); + +ALTER TABLE INDEX_PARAMS ADD CONSTRAINT INDEX_PARAMS_PK PRIMARY KEY (INDEX_ID,PARAM_KEY); + +-- Table COLUMNS_V2 for join relationship +CREATE TABLE COLUMNS_V2 +( + CD_ID bigint NOT NULL, + COMMENT varchar(256) NULL, + "COLUMN_NAME" varchar(128) NOT NULL, + TYPE_NAME varchar(4000) NOT NULL, + INTEGER_IDX int NOT NULL +); + +ALTER TABLE COLUMNS_V2 ADD CONSTRAINT COLUMNS_PK PRIMARY KEY (CD_ID,"COLUMN_NAME"); + +-- Table SERDE_PARAMS for join relationship +CREATE TABLE SERDE_PARAMS +( + SERDE_ID bigint NOT NULL, + PARAM_KEY varchar(256) NOT NULL, + PARAM_VALUE varchar(4000) NULL +); + +ALTER TABLE SERDE_PARAMS ADD CONSTRAINT SERDE_PARAMS_PK PRIMARY KEY (SERDE_ID,PARAM_KEY); + +-- Table PARTITION_PARAMS for join relationship +CREATE TABLE PARTITION_PARAMS +( + PART_ID bigint NOT NULL, + PARAM_KEY varchar(256) NOT NULL, + PARAM_VALUE varchar(4000) NULL +); + +ALTER TABLE PARTITION_PARAMS ADD CONSTRAINT PARTITION_PARAMS_PK PRIMARY KEY (PART_ID,PARAM_KEY); + +-- Table TABLE_PARAMS for join relationship +CREATE TABLE TABLE_PARAMS +( + TBL_ID bigint NOT NULL, + PARAM_KEY varchar(256) NOT NULL, + PARAM_VALUE varchar(4000) NULL +); + +ALTER TABLE TABLE_PARAMS ADD CONSTRAINT TABLE_PARAMS_PK PRIMARY KEY (TBL_ID,PARAM_KEY); + +-- Constraints for table MASTER_KEYS for class(es) [org.apache.hadoop.hive.metastore.model.MMasterKey] + +-- Constraints for table IDXS for class(es) [org.apache.hadoop.hive.metastore.model.MIndex] +ALTER TABLE IDXS ADD CONSTRAINT IDXS_FK1 FOREIGN KEY (INDEX_TBL_ID) REFERENCES TBLS (TBL_ID) ; + +ALTER TABLE IDXS ADD CONSTRAINT IDXS_FK2 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +ALTER TABLE IDXS ADD CONSTRAINT IDXS_FK3 FOREIGN KEY (ORIG_TBL_ID) REFERENCES TBLS (TBL_ID) ; + +CREATE UNIQUE INDEX UNIQUEINDEX ON IDXS (INDEX_NAME,ORIG_TBL_ID); + +CREATE INDEX IDXS_N51 ON IDXS (SD_ID); + +CREATE INDEX IDXS_N50 ON IDXS (ORIG_TBL_ID); + +CREATE INDEX IDXS_N49 ON IDXS (INDEX_TBL_ID); + + +-- Constraints for table PART_COL_STATS for class(es) [org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics] +ALTER TABLE PART_COL_STATS ADD CONSTRAINT PART_COL_STATS_FK1 FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) ; + +CREATE INDEX PART_COL_STATS_N49 ON PART_COL_STATS (PART_ID); + + +-- Constraints for table PART_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MPartitionPrivilege] +ALTER TABLE PART_PRIVS ADD CONSTRAINT PART_PRIVS_FK1 FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) ; + +CREATE INDEX PARTPRIVILEGEINDEX ON PART_PRIVS (PART_ID,PRINCIPAL_NAME,PRINCIPAL_TYPE,PART_PRIV,GRANTOR,GRANTOR_TYPE); + +CREATE INDEX PART_PRIVS_N49 ON PART_PRIVS (PART_ID); + + +-- Constraints for table SKEWED_STRING_LIST for class(es) [org.apache.hadoop.hive.metastore.model.MStringList] + +-- Constraints for table ROLES for class(es) [org.apache.hadoop.hive.metastore.model.MRole] +CREATE UNIQUE INDEX ROLEENTITYINDEX ON ROLES (ROLE_NAME); + + +-- Constraints for table PARTITIONS for class(es) [org.apache.hadoop.hive.metastore.model.MPartition] +ALTER TABLE PARTITIONS ADD CONSTRAINT PARTITIONS_FK1 FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) ; + +ALTER TABLE PARTITIONS ADD CONSTRAINT PARTITIONS_FK2 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +CREATE INDEX PARTITIONS_N49 ON PARTITIONS (SD_ID); + +CREATE INDEX PARTITIONS_N50 ON PARTITIONS (TBL_ID); + +CREATE UNIQUE INDEX UNIQUEPARTITION ON PARTITIONS (PART_NAME,TBL_ID); + + +-- Constraints for table CDS for class(es) [org.apache.hadoop.hive.metastore.model.MColumnDescriptor] + +-- Constraints for table VERSION for class(es) [org.apache.hadoop.hive.metastore.model.MVersionTable] + +-- Constraints for table GLOBAL_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MGlobalPrivilege] +CREATE UNIQUE INDEX GLOBALPRIVILEGEINDEX ON GLOBAL_PRIVS (PRINCIPAL_NAME,PRINCIPAL_TYPE,USER_PRIV,GRANTOR,GRANTOR_TYPE); + + +-- Constraints for table PART_COL_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege] +ALTER TABLE PART_COL_PRIVS ADD CONSTRAINT PART_COL_PRIVS_FK1 FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) ; + +CREATE INDEX PART_COL_PRIVS_N49 ON PART_COL_PRIVS (PART_ID); + +CREATE INDEX PARTITIONCOLUMNPRIVILEGEINDEX ON PART_COL_PRIVS (PART_ID,"COLUMN_NAME",PRINCIPAL_NAME,PRINCIPAL_TYPE,PART_COL_PRIV,GRANTOR,GRANTOR_TYPE); + + +-- Constraints for table DB_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MDBPrivilege] +ALTER TABLE DB_PRIVS ADD CONSTRAINT DB_PRIVS_FK1 FOREIGN KEY (DB_ID) REFERENCES DBS (DB_ID) ; + +CREATE UNIQUE INDEX DBPRIVILEGEINDEX ON DB_PRIVS (DB_ID,PRINCIPAL_NAME,PRINCIPAL_TYPE,DB_PRIV,GRANTOR,GRANTOR_TYPE); + +CREATE INDEX DB_PRIVS_N49 ON DB_PRIVS (DB_ID); + + +-- Constraints for table TAB_COL_STATS for class(es) [org.apache.hadoop.hive.metastore.model.MTableColumnStatistics] +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT TAB_COL_STATS_FK1 FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) ; + +CREATE INDEX TAB_COL_STATS_N49 ON TAB_COL_STATS (TBL_ID); + + +-- Constraints for table TYPES for class(es) [org.apache.hadoop.hive.metastore.model.MType] +CREATE UNIQUE INDEX UNIQUETYPE ON TYPES (TYPE_NAME); + + +-- Constraints for table TBL_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MTablePrivilege] +ALTER TABLE TBL_PRIVS ADD CONSTRAINT TBL_PRIVS_FK1 FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) ; + +CREATE INDEX TBL_PRIVS_N49 ON TBL_PRIVS (TBL_ID); + +CREATE INDEX TABLEPRIVILEGEINDEX ON TBL_PRIVS (TBL_ID,PRINCIPAL_NAME,PRINCIPAL_TYPE,TBL_PRIV,GRANTOR,GRANTOR_TYPE); + + +-- Constraints for table DBS for class(es) [org.apache.hadoop.hive.metastore.model.MDatabase] +CREATE UNIQUE INDEX UNIQUEDATABASE ON DBS ("NAME"); + + +-- Constraints for table TBL_COL_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege] +ALTER TABLE TBL_COL_PRIVS ADD CONSTRAINT TBL_COL_PRIVS_FK1 FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) ; + +CREATE INDEX TABLECOLUMNPRIVILEGEINDEX ON TBL_COL_PRIVS (TBL_ID,"COLUMN_NAME",PRINCIPAL_NAME,PRINCIPAL_TYPE,TBL_COL_PRIV,GRANTOR,GRANTOR_TYPE); + +CREATE INDEX TBL_COL_PRIVS_N49 ON TBL_COL_PRIVS (TBL_ID); + + +-- Constraints for table DELEGATION_TOKENS for class(es) [org.apache.hadoop.hive.metastore.model.MDelegationToken] + +-- Constraints for table SERDES for class(es) [org.apache.hadoop.hive.metastore.model.MSerDeInfo] + +-- Constraints for table FUNCS for class(es) [org.apache.hadoop.hive.metastore.model.MFunction] +ALTER TABLE FUNCS ADD CONSTRAINT FUNCS_FK1 FOREIGN KEY (DB_ID) REFERENCES DBS (DB_ID) ; + +CREATE UNIQUE INDEX UNIQUEFUNCTION ON FUNCS (FUNC_NAME,DB_ID); + +CREATE INDEX FUNCS_N49 ON FUNCS (DB_ID); + + +-- Constraints for table ROLE_MAP for class(es) [org.apache.hadoop.hive.metastore.model.MRoleMap] +ALTER TABLE ROLE_MAP ADD CONSTRAINT ROLE_MAP_FK1 FOREIGN KEY (ROLE_ID) REFERENCES ROLES (ROLE_ID) ; + +CREATE INDEX ROLE_MAP_N49 ON ROLE_MAP (ROLE_ID); + +CREATE UNIQUE INDEX USERROLEMAPINDEX ON ROLE_MAP (PRINCIPAL_NAME,ROLE_ID,GRANTOR,GRANTOR_TYPE); + + +-- Constraints for table TBLS for class(es) [org.apache.hadoop.hive.metastore.model.MTable] +ALTER TABLE TBLS ADD CONSTRAINT TBLS_FK2 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +ALTER TABLE TBLS ADD CONSTRAINT TBLS_FK1 FOREIGN KEY (DB_ID) REFERENCES DBS (DB_ID) ; + +CREATE INDEX TBLS_N50 ON TBLS (SD_ID); + +CREATE UNIQUE INDEX UNIQUETABLE ON TBLS (TBL_NAME,DB_ID); + +CREATE INDEX TBLS_N49 ON TBLS (DB_ID); + + +-- Constraints for table SDS for class(es) [org.apache.hadoop.hive.metastore.model.MStorageDescriptor] +ALTER TABLE SDS ADD CONSTRAINT SDS_FK1 FOREIGN KEY (SERDE_ID) REFERENCES SERDES (SERDE_ID) ; + +ALTER TABLE SDS ADD CONSTRAINT SDS_FK2 FOREIGN KEY (CD_ID) REFERENCES CDS (CD_ID) ; + +CREATE INDEX SDS_N50 ON SDS (CD_ID); + +CREATE INDEX SDS_N49 ON SDS (SERDE_ID); + + +-- Constraints for table PARTITION_EVENTS for class(es) [org.apache.hadoop.hive.metastore.model.MPartitionEvent] +CREATE INDEX PARTITIONEVENTINDEX ON PARTITION_EVENTS (PARTITION_NAME); + + +-- Constraints for table SORT_COLS +ALTER TABLE SORT_COLS ADD CONSTRAINT SORT_COLS_FK1 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +CREATE INDEX SORT_COLS_N49 ON SORT_COLS (SD_ID); + + +-- Constraints for table SKEWED_COL_NAMES +ALTER TABLE SKEWED_COL_NAMES ADD CONSTRAINT SKEWED_COL_NAMES_FK1 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +CREATE INDEX SKEWED_COL_NAMES_N49 ON SKEWED_COL_NAMES (SD_ID); + + +-- Constraints for table SKEWED_COL_VALUE_LOC_MAP +ALTER TABLE SKEWED_COL_VALUE_LOC_MAP ADD CONSTRAINT SKEWED_COL_VALUE_LOC_MAP_FK1 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +ALTER TABLE SKEWED_COL_VALUE_LOC_MAP ADD CONSTRAINT SKEWED_COL_VALUE_LOC_MAP_FK2 FOREIGN KEY (STRING_LIST_ID_KID) REFERENCES SKEWED_STRING_LIST (STRING_LIST_ID) ; + +CREATE INDEX SKEWED_COL_VALUE_LOC_MAP_N50 ON SKEWED_COL_VALUE_LOC_MAP (STRING_LIST_ID_KID); + +CREATE INDEX SKEWED_COL_VALUE_LOC_MAP_N49 ON SKEWED_COL_VALUE_LOC_MAP (SD_ID); + + +-- Constraints for table SKEWED_STRING_LIST_VALUES +ALTER TABLE SKEWED_STRING_LIST_VALUES ADD CONSTRAINT SKEWED_STRING_LIST_VALUES_FK1 FOREIGN KEY (STRING_LIST_ID) REFERENCES SKEWED_STRING_LIST (STRING_LIST_ID) ; + +CREATE INDEX SKEWED_STRING_LIST_VALUES_N49 ON SKEWED_STRING_LIST_VALUES (STRING_LIST_ID); + + +-- Constraints for table PARTITION_KEY_VALS +ALTER TABLE PARTITION_KEY_VALS ADD CONSTRAINT PARTITION_KEY_VALS_FK1 FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) ; + +CREATE INDEX PARTITION_KEY_VALS_N49 ON PARTITION_KEY_VALS (PART_ID); + + +-- Constraints for table PARTITION_KEYS +ALTER TABLE PARTITION_KEYS ADD CONSTRAINT PARTITION_KEYS_FK1 FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) ; + +CREATE INDEX PARTITION_KEYS_N49 ON PARTITION_KEYS (TBL_ID); + + +-- Constraints for table SKEWED_VALUES +ALTER TABLE SKEWED_VALUES ADD CONSTRAINT SKEWED_VALUES_FK1 FOREIGN KEY (SD_ID_OID) REFERENCES SDS (SD_ID) ; + +ALTER TABLE SKEWED_VALUES ADD CONSTRAINT SKEWED_VALUES_FK2 FOREIGN KEY (STRING_LIST_ID_EID) REFERENCES SKEWED_STRING_LIST (STRING_LIST_ID) ; + +CREATE INDEX SKEWED_VALUES_N50 ON SKEWED_VALUES (SD_ID_OID); + +CREATE INDEX SKEWED_VALUES_N49 ON SKEWED_VALUES (STRING_LIST_ID_EID); + + +-- Constraints for table SD_PARAMS +ALTER TABLE SD_PARAMS ADD CONSTRAINT SD_PARAMS_FK1 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +CREATE INDEX SD_PARAMS_N49 ON SD_PARAMS (SD_ID); + + +-- Constraints for table FUNC_RU +ALTER TABLE FUNC_RU ADD CONSTRAINT FUNC_RU_FK1 FOREIGN KEY (FUNC_ID) REFERENCES FUNCS (FUNC_ID) ; + +CREATE INDEX FUNC_RU_N49 ON FUNC_RU (FUNC_ID); + + +-- Constraints for table TYPE_FIELDS +ALTER TABLE TYPE_FIELDS ADD CONSTRAINT TYPE_FIELDS_FK1 FOREIGN KEY (TYPE_NAME) REFERENCES TYPES (TYPES_ID) ; + +CREATE INDEX TYPE_FIELDS_N49 ON TYPE_FIELDS (TYPE_NAME); + + +-- Constraints for table BUCKETING_COLS +ALTER TABLE BUCKETING_COLS ADD CONSTRAINT BUCKETING_COLS_FK1 FOREIGN KEY (SD_ID) REFERENCES SDS (SD_ID) ; + +CREATE INDEX BUCKETING_COLS_N49 ON BUCKETING_COLS (SD_ID); + + +-- Constraints for table DATABASE_PARAMS +ALTER TABLE DATABASE_PARAMS ADD CONSTRAINT DATABASE_PARAMS_FK1 FOREIGN KEY (DB_ID) REFERENCES DBS (DB_ID) ; + +CREATE INDEX DATABASE_PARAMS_N49 ON DATABASE_PARAMS (DB_ID); + + +-- Constraints for table INDEX_PARAMS +ALTER TABLE INDEX_PARAMS ADD CONSTRAINT INDEX_PARAMS_FK1 FOREIGN KEY (INDEX_ID) REFERENCES IDXS (INDEX_ID) ; + +CREATE INDEX INDEX_PARAMS_N49 ON INDEX_PARAMS (INDEX_ID); + + +-- Constraints for table COLUMNS_V2 +ALTER TABLE COLUMNS_V2 ADD CONSTRAINT COLUMNS_V2_FK1 FOREIGN KEY (CD_ID) REFERENCES CDS (CD_ID) ; + +CREATE INDEX COLUMNS_V2_N49 ON COLUMNS_V2 (CD_ID); + + +-- Constraints for table SERDE_PARAMS +ALTER TABLE SERDE_PARAMS ADD CONSTRAINT SERDE_PARAMS_FK1 FOREIGN KEY (SERDE_ID) REFERENCES SERDES (SERDE_ID) ; + +CREATE INDEX SERDE_PARAMS_N49 ON SERDE_PARAMS (SERDE_ID); + + +-- Constraints for table PARTITION_PARAMS +ALTER TABLE PARTITION_PARAMS ADD CONSTRAINT PARTITION_PARAMS_FK1 FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) ; + +CREATE INDEX PARTITION_PARAMS_N49 ON PARTITION_PARAMS (PART_ID); + + +-- Constraints for table TABLE_PARAMS +ALTER TABLE TABLE_PARAMS ADD CONSTRAINT TABLE_PARAMS_FK1 FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) ; + +CREATE INDEX TABLE_PARAMS_N49 ON TABLE_PARAMS (TBL_ID); + + + +-- ----------------------------------------------------------------------------------------------------------------------------------------------- +-- Transaction and Lock Tables +-- These are not part of package jdo, so if you are going to regenerate this file you need to manually add the following section back to the file. +-- ----------------------------------------------------------------------------------------------------------------------------------------------- +CREATE TABLE COMPACTION_QUEUE( + CQ_ID int NOT NULL, + CQ_DATABASE varchar(128) NOT NULL, + CQ_TABLE varchar(128) NOT NULL, + CQ_PARTITION varchar(767) NULL, + CQ_STATE char(1) NOT NULL, + CQ_TYPE char(1) NOT NULL, + CQ_WORKER_ID varchar(128) NULL, + CQ_START int NULL, + CQ_RUN_AS varchar(128) NULL, +PRIMARY KEY CLUSTERED +( + CQ_ID ASC +) +); + +CREATE TABLE COMPLETED_TXN_COMPONENTS( + CTC_TXNID int NULL, + CTC_DATABASE varchar(128) NOT NULL, + CTC_TABLE varchar(128) NULL, + CTC_PARTITION varchar(767) NULL +); + +CREATE TABLE HIVE_LOCKS( + HL_LOCK_EXT_ID int NOT NULL, + HL_LOCK_INT_ID int NOT NULL, + HL_TXNID int NULL, + HL_DB varchar(128) NOT NULL, + HL_TABLE varchar(128) NULL, + HL_PARTITION varchar(767) NULL, + HL_LOCK_STATE char(1) NOT NULL, + HL_LOCK_TYPE char(1) NOT NULL, + HL_LAST_HEARTBEAT int NOT NULL, + HL_ACQUIRED_AT int NULL, + HL_USER varchar(128) NOT NULL, + HL_HOST varchar(128) NOT NULL, +PRIMARY KEY CLUSTERED +( + HL_LOCK_EXT_ID ASC, + HL_LOCK_INT_ID ASC +) +); + +CREATE TABLE NEXT_COMPACTION_QUEUE_ID( + NCQ_NEXT int NOT NULL +); + +INSERT INTO NEXT_COMPACTION_QUEUE_ID VALUES(1); + +CREATE TABLE NEXT_LOCK_ID( + NL_NEXT int NOT NULL +); + +INSERT INTO NEXT_LOCK_ID VALUES(1); + +CREATE TABLE NEXT_TXN_ID( + NTXN_NEXT int NOT NULL +); + +INSERT INTO NEXT_TXN_ID VALUES(1); + +CREATE TABLE TXNS( + TXN_ID int NOT NULL, + TXN_STATE char(1) NOT NULL, + TXN_STARTED int NOT NULL, + TXN_LAST_HEARTBEAT int NOT NULL, + TXN_USER varchar(128) NOT NULL, + TXN_HOST varchar(128) NOT NULL, +PRIMARY KEY CLUSTERED +( + TXN_ID ASC +) +); + +CREATE TABLE TXN_COMPONENTS( + TC_TXNID int NULL, + TC_DATABASE varchar(128) NOT NULL, + TC_TABLE varchar(128) NULL, + TC_PARTITION varchar(767) NULL +); + +ALTER TABLE TXN_COMPONENTS WITH CHECK ADD FOREIGN KEY(TC_TXNID) REFERENCES TXNS (TXN_ID); + + + +-- ----------------------------------------------------------------- +-- Record schema version. Should be the last step in the init script +-- ----------------------------------------------------------------- +INSERT INTO VERSION (VER_ID, SCHEMA_VERSION, VERSION_COMMENT) VALUES (1, '0.14.0', 'Hive release version 0.14.0'); diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java index a993810..c84fa29 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java @@ -135,9 +135,9 @@ public FieldSchema( String comment) { this(); - this.name = name; - this.type = type; - this.comment = comment; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); + this.type = org.apache.hive.common.util.HiveStringUtils.intern(type); + this.comment = org.apache.hive.common.util.HiveStringUtils.intern(comment); } /** @@ -145,13 +145,13 @@ public FieldSchema( */ public FieldSchema(FieldSchema other) { if (other.isSetName()) { - this.name = other.name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(other.name); } if (other.isSetType()) { - this.type = other.type; + this.type = org.apache.hive.common.util.HiveStringUtils.intern(other.type); } if (other.isSetComment()) { - this.comment = other.comment; + this.comment = org.apache.hive.common.util.HiveStringUtils.intern(other.comment); } } @@ -171,7 +171,7 @@ public String getName() { } public void setName(String name) { - this.name = name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); } public void unsetName() { @@ -194,7 +194,7 @@ public String getType() { } public void setType(String type) { - this.type = type; + this.type = org.apache.hive.common.util.HiveStringUtils.intern(type); } public void unsetType() { @@ -217,7 +217,7 @@ public String getComment() { } public void setComment(String comment) { - this.comment = comment; + this.comment = org.apache.hive.common.util.HiveStringUtils.intern(comment); } public void unsetComment() { diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java index 312807e..242d54d 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java @@ -182,14 +182,14 @@ public Partition( { this(); this.values = values; - this.dbName = dbName; - this.tableName = tableName; + this.dbName = org.apache.hive.common.util.HiveStringUtils.intern(dbName); + this.tableName = org.apache.hive.common.util.HiveStringUtils.intern(tableName); this.createTime = createTime; setCreateTimeIsSet(true); this.lastAccessTime = lastAccessTime; setLastAccessTimeIsSet(true); this.sd = sd; - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } /** @@ -205,10 +205,10 @@ public Partition(Partition other) { this.values = __this__values; } if (other.isSetDbName()) { - this.dbName = other.dbName; + this.dbName = org.apache.hive.common.util.HiveStringUtils.intern(other.dbName); } if (other.isSetTableName()) { - this.tableName = other.tableName; + this.tableName = org.apache.hive.common.util.HiveStringUtils.intern(other.tableName); } this.createTime = other.createTime; this.lastAccessTime = other.lastAccessTime; @@ -222,9 +222,9 @@ public Partition(Partition other) { String other_element_key = other_element.getKey(); String other_element_value = other_element.getValue(); - String __this__parameters_copy_key = other_element_key; + String __this__parameters_copy_key = org.apache.hive.common.util.HiveStringUtils.intern(other_element_key); - String __this__parameters_copy_value = other_element_value; + String __this__parameters_copy_value = org.apache.hive.common.util.HiveStringUtils.intern(other_element_value); __this__parameters.put(__this__parameters_copy_key, __this__parameters_copy_value); } @@ -296,7 +296,7 @@ public String getDbName() { } public void setDbName(String dbName) { - this.dbName = dbName; + this.dbName = org.apache.hive.common.util.HiveStringUtils.intern(dbName); } public void unsetDbName() { @@ -319,7 +319,7 @@ public String getTableName() { } public void setTableName(String tableName) { - this.tableName = tableName; + this.tableName = org.apache.hive.common.util.HiveStringUtils.intern(tableName); } public void unsetTableName() { @@ -420,7 +420,7 @@ public void putToParameters(String key, String val) { } public void setParameters(Map parameters) { - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } public void unsetParameters() { diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java index 24d65bb..2466d8f 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java @@ -137,9 +137,9 @@ public SerDeInfo( Map parameters) { this(); - this.name = name; - this.serializationLib = serializationLib; - this.parameters = parameters; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); + this.serializationLib = org.apache.hive.common.util.HiveStringUtils.intern(serializationLib); + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } /** @@ -147,10 +147,10 @@ public SerDeInfo( */ public SerDeInfo(SerDeInfo other) { if (other.isSetName()) { - this.name = other.name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(other.name); } if (other.isSetSerializationLib()) { - this.serializationLib = other.serializationLib; + this.serializationLib = org.apache.hive.common.util.HiveStringUtils.intern(other.serializationLib); } if (other.isSetParameters()) { Map __this__parameters = new HashMap(); @@ -159,9 +159,9 @@ public SerDeInfo(SerDeInfo other) { String other_element_key = other_element.getKey(); String other_element_value = other_element.getValue(); - String __this__parameters_copy_key = other_element_key; + String __this__parameters_copy_key = org.apache.hive.common.util.HiveStringUtils.intern(other_element_key); - String __this__parameters_copy_value = other_element_value; + String __this__parameters_copy_value = org.apache.hive.common.util.HiveStringUtils.intern(other_element_value); __this__parameters.put(__this__parameters_copy_key, __this__parameters_copy_value); } @@ -185,7 +185,7 @@ public String getName() { } public void setName(String name) { - this.name = name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); } public void unsetName() { @@ -208,7 +208,7 @@ public String getSerializationLib() { } public void setSerializationLib(String serializationLib) { - this.serializationLib = serializationLib; + this.serializationLib = org.apache.hive.common.util.HiveStringUtils.intern(serializationLib); } public void unsetSerializationLib() { @@ -242,7 +242,7 @@ public void putToParameters(String key, String val) { } public void setParameters(Map parameters) { - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } public void unsetParameters() { diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java index d0b9843..b91cc1c 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java @@ -216,17 +216,17 @@ public StorageDescriptor( { this(); this.cols = cols; - this.location = location; - this.inputFormat = inputFormat; - this.outputFormat = outputFormat; + this.location = org.apache.hive.common.util.HiveStringUtils.intern(location); + this.inputFormat = org.apache.hive.common.util.HiveStringUtils.intern(inputFormat); + this.outputFormat = org.apache.hive.common.util.HiveStringUtils.intern(outputFormat); this.compressed = compressed; setCompressedIsSet(true); this.numBuckets = numBuckets; setNumBucketsIsSet(true); this.serdeInfo = serdeInfo; - this.bucketCols = bucketCols; + this.bucketCols = org.apache.hive.common.util.HiveStringUtils.intern(bucketCols); this.sortCols = sortCols; - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } /** @@ -242,13 +242,13 @@ public StorageDescriptor(StorageDescriptor other) { this.cols = __this__cols; } if (other.isSetLocation()) { - this.location = other.location; + this.location = org.apache.hive.common.util.HiveStringUtils.intern(other.location); } if (other.isSetInputFormat()) { - this.inputFormat = other.inputFormat; + this.inputFormat = org.apache.hive.common.util.HiveStringUtils.intern(other.inputFormat); } if (other.isSetOutputFormat()) { - this.outputFormat = other.outputFormat; + this.outputFormat = org.apache.hive.common.util.HiveStringUtils.intern(other.outputFormat); } this.compressed = other.compressed; this.numBuckets = other.numBuckets; @@ -276,9 +276,9 @@ public StorageDescriptor(StorageDescriptor other) { String other_element_key = other_element.getKey(); String other_element_value = other_element.getValue(); - String __this__parameters_copy_key = other_element_key; + String __this__parameters_copy_key = org.apache.hive.common.util.HiveStringUtils.intern(other_element_key); - String __this__parameters_copy_value = other_element_value; + String __this__parameters_copy_value = org.apache.hive.common.util.HiveStringUtils.intern(other_element_value); __this__parameters.put(__this__parameters_copy_key, __this__parameters_copy_value); } @@ -356,7 +356,7 @@ public String getLocation() { } public void setLocation(String location) { - this.location = location; + this.location = org.apache.hive.common.util.HiveStringUtils.intern(location); } public void unsetLocation() { @@ -379,7 +379,7 @@ public String getInputFormat() { } public void setInputFormat(String inputFormat) { - this.inputFormat = inputFormat; + this.inputFormat = org.apache.hive.common.util.HiveStringUtils.intern(inputFormat); } public void unsetInputFormat() { @@ -402,7 +402,7 @@ public String getOutputFormat() { } public void setOutputFormat(String outputFormat) { - this.outputFormat = outputFormat; + this.outputFormat = org.apache.hive.common.util.HiveStringUtils.intern(outputFormat); } public void unsetOutputFormat() { @@ -507,7 +507,7 @@ public void addToBucketCols(String elem) { } public void setBucketCols(List bucketCols) { - this.bucketCols = bucketCols; + this.bucketCols = org.apache.hive.common.util.HiveStringUtils.intern(bucketCols); } public void unsetBucketCols() { @@ -579,7 +579,7 @@ public void putToParameters(String key, String val) { } public void setParameters(Map parameters) { - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } public void unsetParameters() { diff --git a/packaging/pom.xml b/packaging/pom.xml index d11fc1e..ebf34ed 100644 --- a/packaging/pom.xml +++ b/packaging/pom.xml @@ -158,6 +158,12 @@ org.apache.hive + hive-jdbc + ${project.version} + ${hive.jdbc.driver.classifier} + + + org.apache.hive hive-beeline ${project.version} diff --git a/pom.xml b/pom.xml index de99e25..c6f2382 100644 --- a/pom.xml +++ b/pom.xml @@ -105,7 +105,7 @@ 1.1 3.0.1 2.4 - 2.4 + 2.6 3.1 1.1.3 1.5.4 @@ -137,6 +137,7 @@ 0.9.0 0.9.0 1.2.16 + 2.3 1.9.5 2.0.0-M5 @@ -576,6 +581,7 @@ com.twitter:parquet-hadoop-bundle org.apache.thrift:libthrift commons-lang:commons-lang + org.apache.commons:commons-lang3 org.jodd:jodd-core org.json:json org.apache.avro:avro @@ -593,6 +599,7 @@ org.codehaus.jackson:jackson-core-asl org.codehaus.jackson:jackson-mapper-asl + net.sf.opencsv:opencsv diff --git a/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp b/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp index b9e04e2..04a0d67 100644 --- a/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp +++ b/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp @@ -52,7 +52,9 @@ int _kOperatorTypeValues[] = { OperatorType::PTF, OperatorType::MUX, OperatorType::DEMUX, - OperatorType::EVENT + OperatorType::EVENT, + OperatorType::ORCFILEMERGE, + OperatorType::RCFILEMERGE }; const char* _kOperatorTypeNames[] = { "JOIN", @@ -76,9 +78,11 @@ const char* _kOperatorTypeNames[] = { "PTF", "MUX", "DEMUX", - "EVENT" + "EVENT", + "ORCFILEMERGE", + "RCFILEMERGE" }; -const std::map _OperatorType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(22, _kOperatorTypeValues, _kOperatorTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map _OperatorType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(24, _kOperatorTypeValues, _kOperatorTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kTaskTypeValues[] = { TaskType::MAP, diff --git a/ql/src/gen/thrift/gen-cpp/queryplan_types.h b/ql/src/gen/thrift/gen-cpp/queryplan_types.h index 30ef711..d7797c6 100644 --- a/ql/src/gen/thrift/gen-cpp/queryplan_types.h +++ b/ql/src/gen/thrift/gen-cpp/queryplan_types.h @@ -57,7 +57,9 @@ struct OperatorType { PTF = 18, MUX = 19, DEMUX = 20, - EVENT = 21 + EVENT = 21, + ORCFILEMERGE = 22, + RCFILEMERGE = 23 }; }; diff --git a/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java b/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java index 4e735f7..e5c4c44 100644 --- a/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java +++ b/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java @@ -7,6 +7,10 @@ package org.apache.hadoop.hive.ql.plan.api; +import java.util.Map; +import java.util.HashMap; +import org.apache.thrift.TEnum; + public enum OperatorType implements org.apache.thrift.TEnum { JOIN(0), MAPJOIN(1), diff --git a/ql/src/gen/thrift/gen-php/Types.php b/ql/src/gen/thrift/gen-php/Types.php index 4ed7fcc..e6f87d3 100644 --- a/ql/src/gen/thrift/gen-php/Types.php +++ b/ql/src/gen/thrift/gen-php/Types.php @@ -57,6 +57,8 @@ final class OperatorType { const MUX = 19; const DEMUX = 20; const EVENT = 21; + const ORCFILEMERGE = 22; + const RCFILEMERGE = 23; static public $__names = array( 0 => 'JOIN', 1 => 'MAPJOIN', @@ -80,6 +82,8 @@ final class OperatorType { 19 => 'MUX', 20 => 'DEMUX', 21 => 'EVENT', + 22 => 'ORCFILEMERGE', + 23 => 'RCFILEMERGE', ); } diff --git a/ql/src/gen/thrift/gen-py/queryplan/ttypes.py b/ql/src/gen/thrift/gen-py/queryplan/ttypes.py index 96777fa..2e26e58 100644 --- a/ql/src/gen/thrift/gen-py/queryplan/ttypes.py +++ b/ql/src/gen/thrift/gen-py/queryplan/ttypes.py @@ -67,6 +67,8 @@ class OperatorType: MUX = 19 DEMUX = 20 EVENT = 21 + ORCFILEMERGE = 22 + RCFILEMERGE = 23 _VALUES_TO_NAMES = { 0: "JOIN", @@ -91,6 +93,8 @@ class OperatorType: 19: "MUX", 20: "DEMUX", 21: "EVENT", + 22: "ORCFILEMERGE", + 23: "RCFILEMERGE", } _NAMES_TO_VALUES = { @@ -116,6 +120,8 @@ class OperatorType: "MUX": 19, "DEMUX": 20, "EVENT": 21, + "ORCFILEMERGE": 22, + "RCFILEMERGE": 23, } class TaskType: diff --git a/ql/src/gen/thrift/gen-rb/queryplan_types.rb b/ql/src/gen/thrift/gen-rb/queryplan_types.rb index 449becf..e5e98ae 100644 --- a/ql/src/gen/thrift/gen-rb/queryplan_types.rb +++ b/ql/src/gen/thrift/gen-rb/queryplan_types.rb @@ -43,8 +43,10 @@ module OperatorType MUX = 19 DEMUX = 20 EVENT = 21 - VALUE_MAP = {0 => "JOIN", 1 => "MAPJOIN", 2 => "EXTRACT", 3 => "FILTER", 4 => "FORWARD", 5 => "GROUPBY", 6 => "LIMIT", 7 => "SCRIPT", 8 => "SELECT", 9 => "TABLESCAN", 10 => "FILESINK", 11 => "REDUCESINK", 12 => "UNION", 13 => "UDTF", 14 => "LATERALVIEWJOIN", 15 => "LATERALVIEWFORWARD", 16 => "HASHTABLESINK", 17 => "HASHTABLEDUMMY", 18 => "PTF", 19 => "MUX", 20 => "DEMUX", 21 => "EVENT"} - VALID_VALUES = Set.new([JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN, LATERALVIEWFORWARD, HASHTABLESINK, HASHTABLEDUMMY, PTF, MUX, DEMUX, EVENT]).freeze + ORCFILEMERGE = 22 + RCFILEMERGE = 23 + VALUE_MAP = {0 => "JOIN", 1 => "MAPJOIN", 2 => "EXTRACT", 3 => "FILTER", 4 => "FORWARD", 5 => "GROUPBY", 6 => "LIMIT", 7 => "SCRIPT", 8 => "SELECT", 9 => "TABLESCAN", 10 => "FILESINK", 11 => "REDUCESINK", 12 => "UNION", 13 => "UDTF", 14 => "LATERALVIEWJOIN", 15 => "LATERALVIEWFORWARD", 16 => "HASHTABLESINK", 17 => "HASHTABLEDUMMY", 18 => "PTF", 19 => "MUX", 20 => "DEMUX", 21 => "EVENT", 22 => "ORCFILEMERGE", 23 => "RCFILEMERGE"} + VALID_VALUES = Set.new([JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN, LATERALVIEWFORWARD, HASHTABLESINK, HASHTABLEDUMMY, PTF, MUX, DEMUX, EVENT, ORCFILEMERGE, RCFILEMERGE]).freeze end module TaskType diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index d8b0516..44d3c46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -503,9 +503,11 @@ public static void doAuthorization(BaseSemanticAnalyzer sem, String command) // get mapping of tables to columns used ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo(); // colAccessInfo is set only in case of SemanticAnalyzer - Map> tab2Cols = colAccessInfo != null ? colAccessInfo + Map> selectTab2Cols = colAccessInfo != null ? colAccessInfo .getTableToColumnAccessMap() : null; - doAuthorizationV2(ss, op, inputs, outputs, command, tab2Cols); + Map> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? + sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null; + doAuthorizationV2(ss, op, inputs, outputs, command, selectTab2Cols, updateTab2Cols); return; } if (op == null) { @@ -696,7 +698,13 @@ private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticA } private static void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet inputs, - HashSet outputs, String command, Map> tab2cols) throws HiveException { + HashSet outputs, String command, Map> tab2cols, + Map> updateTab2Cols) throws HiveException { + + /* comment for reviewers -> updateTab2Cols needed to be separate from tab2cols because if I + pass tab2cols to getHivePrivObjects for the output case it will trip up insert/selects, + since the insert will get passed the columns from the select. + */ HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); authzContextBuilder.setUserIpAddress(ss.getUserIpAddress()); @@ -704,7 +712,7 @@ private static void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet HiveOperationType hiveOpType = getHiveOperationType(op); List inputsHObjs = getHivePrivObjects(inputs, tab2cols); - List outputHObjs = getHivePrivObjects(outputs, null); + List outputHObjs = getHivePrivObjects(outputs, updateTab2Cols); ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build()); } @@ -730,12 +738,6 @@ private static void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet //do not authorize temporary uris continue; } - if (privObject instanceof ReadEntity && ((ReadEntity)privObject).isUpdateOrDelete()) { - // Skip this one, as we don't want to check select privileges for the table we're reading - // for an update or delete. - continue; - } - //support for authorization on partitions needs to be added String dbname = null; String objName = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index b3389cc..463e8fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -84,7 +84,8 @@ INVALID_PATH(10027, "Invalid path"), ILLEGAL_PATH(10028, "Path is not legal"), INVALID_NUMERICAL_CONSTANT(10029, "Invalid numerical constant"), - INVALID_ARRAYINDEX_CONSTANT(10030, "Non-constant expressions for array indexes not supported"), + INVALID_ARRAYINDEX_TYPE(10030, + "Not proper type for index of ARRAY. Currently, only integer type is supported"), INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"), INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"), NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 89fff81..b0de749 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; +import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -34,10 +35,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.FooterBuffer; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveRecordReader; @@ -48,7 +48,6 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -60,11 +59,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; @@ -82,6 +78,9 @@ static Log LOG = LogFactory.getLog(FetchOperator.class.getName()); static LogHelper console = new LogHelper(LOG); + public static final String FETCH_OPERATOR_DIRECTORY_LIST = + "hive.complete.dir.list"; + private boolean isNativeTable; private FetchWork work; protected Operator operator; // operator tree for processing row further (option) @@ -352,6 +351,7 @@ private void getNextPath() throws Exception { } return; } else { + setFetchOperatorContext(job, work.getPartDir()); iterPath = work.getPartDir().iterator(); iterPartDesc = work.getPartDesc().iterator(); } @@ -380,6 +380,30 @@ private void getNextPath() throws Exception { } /** + * Set context for this fetch operator in to the jobconf. + * This helps InputFormats make decisions based on the scope of the complete + * operation. + * @param conf the configuration to modify + * @param partDirs the list of partition directories + */ + static void setFetchOperatorContext(JobConf conf, + ArrayList partDirs) { + if (partDirs != null) { + StringBuilder buff = new StringBuilder(); + boolean first = true; + for(Path p: partDirs) { + if (first) { + first = false; + } else { + buff.append('\t'); + } + buff.append(StringEscapeUtils.escapeJava(p.toString())); + } + conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString()); + } + } + + /** * A cache of Object Inspector Settable Properties. */ private static Map oiSettableProperties = new HashMap(); @@ -748,7 +772,8 @@ public ObjectInspector getOutputObjectInspector() throws HiveException { */ private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException { boolean recursive = HiveConf.getBoolVar(job, HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE); - if (!recursive) { + // If this is in acid format always read it recursively regardless of what the jobconf says. + if (!recursive && !AcidUtils.isAcid(p, job)) { return fs.listStatus(p); } List results = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index c503bbb..80b7420 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -903,15 +903,15 @@ public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) { (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); } - if (FunctionRegistry.implicitConvertable(a, b)) { + if (FunctionRegistry.implicitConvertible(a, b)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB); } - if (FunctionRegistry.implicitConvertable(b, a)) { + if (FunctionRegistry.implicitConvertible(b, a)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); } for (PrimitiveCategory t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(pcA, t) - && FunctionRegistry.implicitConvertable(pcB, t)) { + if (FunctionRegistry.implicitConvertible(pcA, t) + && FunctionRegistry.implicitConvertible(pcB, t)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } @@ -955,8 +955,8 @@ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { } for (PrimitiveCategory t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(pcA, t) - && FunctionRegistry.implicitConvertable(pcB, t)) { + if (FunctionRegistry.implicitConvertible(pcA, t) + && FunctionRegistry.implicitConvertible(pcB, t)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } @@ -1007,7 +1007,7 @@ public static TypeInfo getCommonClass(TypeInfo a, TypeInfo b) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat); } - public static boolean implicitConvertable(PrimitiveCategory from, PrimitiveCategory to) { + public static boolean implicitConvertible(PrimitiveCategory from, PrimitiveCategory to) { if (from == to) { return true; } @@ -1058,7 +1058,7 @@ public static boolean implicitConvertable(PrimitiveCategory from, PrimitiveCateg * Returns whether it is possible to implicitly convert an object of Class * from to Class to. */ - public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { + public static boolean implicitConvertible(TypeInfo from, TypeInfo to) { if (from.equals(to)) { return true; } @@ -1067,9 +1067,9 @@ public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { // 2 TypeInfos from the same qualified type (varchar, decimal) should still be // seen as equivalent. if (from.getCategory() == Category.PRIMITIVE && to.getCategory() == Category.PRIMITIVE) { - return implicitConvertable( - ((PrimitiveTypeInfo)from).getPrimitiveCategory(), - ((PrimitiveTypeInfo)to).getPrimitiveCategory()); + return implicitConvertible( + ((PrimitiveTypeInfo) from).getPrimitiveCategory(), + ((PrimitiveTypeInfo) to).getPrimitiveCategory()); } return false; } @@ -1305,7 +1305,7 @@ public static int matchCost(TypeInfo argumentPassed, // but there is a conversion cost. return 1; } - if (!exact && implicitConvertable(argumentPassed, argumentAccepted)) { + if (!exact && implicitConvertible(argumentPassed, argumentAccepted)) { return 1; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java index 487bb33..2c9e81f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java @@ -697,6 +697,7 @@ public MergeQueue(String alias, FetchWork fetchWork, JobConf jobConf, // But if hive supports assigning bucket number for each partition, this can be vary public void setupContext(List paths) throws HiveException { int segmentLen = paths.size(); + FetchOperator.setFetchOperatorContext(jobConf, fetchWork.getPartDir()); FetchOperator[] segments = segmentsForSize(segmentLen); for (int i = 0 ; i < segmentLen; i++) { Path path = paths.get(i); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index f2f37b2..db64193 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -27,6 +27,7 @@ import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.WordUtils; +import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -2283,13 +2284,15 @@ public static int getDefaultNotificationInterval(Configuration hconf) { * configuration which receives configured properties */ public static void copyTableJobPropertiesToConf(TableDesc tbl, JobConf job) { - String bucketString = tbl.getProperties() - .getProperty(hive_metastoreConstants.BUCKET_COUNT); - // copy the bucket count - if (bucketString != null) { - job.set(hive_metastoreConstants.BUCKET_COUNT, bucketString); + Properties tblProperties = tbl.getProperties(); + for(String name: tblProperties.stringPropertyNames()) { + if (job.get(name) == null) { + String val = (String) tblProperties.get(name); + if (val != null) { + job.set(name, StringEscapeUtils.escapeJava(val)); + } + } } - Map jobProperties = tbl.getJobProperties(); if (jobProperties == null) { return; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index b7d40c6..ac4b5a1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -850,7 +850,7 @@ public LocalResource localizeResource(Path src, Path dest, Configuration conf) throws IOException { FileSystem destFS = dest.getFileSystem(conf); - if (src != null) { + if (src != null && checkPreExisting(src, dest, conf) == false) { // copy the src to the destination and create local resource. // do not overwrite. LOG.info("Localizing resource because it does not exist: " + src + " to dest: " + dest); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java index 54ac686..29f6bfa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java @@ -141,8 +141,7 @@ public int monitorExecution(final DAGClient dagClient, HiveTxnManager txnMgr, case RUNNING: if (!running) { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING); - console.printInfo("Status: Running (application id: " - +dagClient.getExecutionContext()+")\n"); + console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n"); for (String s: progressMap.keySet()) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index acee377..de33830 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble; @@ -1898,7 +1899,7 @@ static String getUndecoratedName(String hiveTypeName) { add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFSumLong.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index 7a8b8bf..c037ea8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -660,7 +660,7 @@ public VectorExpressionWriter init(SettableStringObjectInspector objInspector) @Override public Object writeValue(byte[] value, int start, int length) throws HiveException { this.text.set(value, start, length); - ((SettableStringObjectInspector) this.objectInspector).set(this.obj, this.text.toString()); + ((SettableStringObjectInspector) this.objectInspector).set(this.obj, this.text); return this.obj; } @@ -671,7 +671,7 @@ public Object setValue(Object field, byte[] value, int start, int length) field = initValue(null); } this.text.set(value, start, length); - ((SettableStringObjectInspector) this.objectInspector).set(field, this.text.toString()); + ((SettableStringObjectInspector) this.objectInspector).set(field, this.text); return field; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java index 5aa4d4c..4e43905 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java @@ -46,15 +46,7 @@ private static final long serialVersionUID = 1L; - transient private long value; - transient private boolean isNull; - - public void initIfNull() { - if (isNull) { - isNull = false; - value = 0; - } - } + transient private long count; @Override public int getVariableSize() { @@ -63,8 +55,7 @@ public int getVariableSize() { @Override public void reset() { - isNull = true; - value = 0L; + count = 0L; } } @@ -131,8 +122,7 @@ private void iterateNoNullsWithAggregationSelection( aggregationBufferSets, aggregateIndex, i); - myagg.initIfNull(); - myagg.value++; + myagg.count++; } } @@ -148,8 +138,7 @@ private void iterateHasNullsWithAggregationSelection( aggregationBufferSets, aggregateIndex, i); - myagg.initIfNull(); - myagg.value++; + myagg.count++; } } } @@ -168,8 +157,7 @@ private void iterateHasNullsSelectionWithAggregationSelection( aggregationBufferSets, aggregateIndex, j); - myagg.initIfNull(); - myagg.value++; + myagg.count++; } } } @@ -191,17 +179,15 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation)agg; - myagg.initIfNull(); - if (inputVector.isRepeating) { if (inputVector.noNulls || !inputVector.isNull[0]) { - myagg.value += batchSize; + myagg.count += batchSize; } return; } if (inputVector.noNulls) { - myagg.value += batchSize; + myagg.count += batchSize; return; } else if (!batch.selectedInUse) { @@ -221,7 +207,7 @@ private void iterateSelectionHasNulls( for (int j=0; j< batchSize; ++j) { int i = selected[j]; if (!isNull[i]) { - myagg.value += 1; + myagg.count += 1; } } } @@ -233,7 +219,7 @@ private void iterateNoSelectionHasNulls( for (int i=0; i< batchSize; ++i) { if (!isNull[i]) { - myagg.value += 1; + myagg.count += 1; } } } @@ -251,14 +237,9 @@ public void reset(AggregationBuffer agg) throws HiveException { @Override public Object evaluateOutput(AggregationBuffer agg) throws HiveException { - Aggregation myagg = (Aggregation) agg; - if (myagg.isNull) { - return null; - } - else { - result.set (myagg.value); + Aggregation myagg = (Aggregation) agg; + result.set (myagg.count); return result; - } } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java new file mode 100644 index 0000000..7dabbd8 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java @@ -0,0 +1,400 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + + +/** + * VectorUDAFCountMerge. Vectorized implementation for COUNT aggregate on reduce-side (merge). + */ +@Description(name = "count", value = "_FUNC_(expr) - Returns the merged sum value of expr (vectorized, type: long)") + +public class VectorUDAFCountMerge extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + * class for storing the current aggregate value. + */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private long value; + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset() { + value = 0L; + } + } + + private VectorExpression inputExpression = null; + transient private final LongWritable result; + + public VectorUDAFCountMerge(VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public VectorUDAFCountMerge() { + super(); + result = new LongWritable(0); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + LongColumnVector inputVector = (LongColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + long[] vector = inputVector.vector; + + if (inputVector.noNulls) { + if (inputVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batchSize); + } + } + } else { + if (inputVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector[0], batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector[0], batchSize, inputVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batchSize, inputVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.value += value; + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long[] values, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.value += values[selection[i]]; + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long[] values, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.value += values[i]; + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long value, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.value += value; + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long value, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.value += value; + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long[] values, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + myagg.value += values[i]; + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long[] values, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.value += values[i]; + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + LongColumnVector inputVector = (LongColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + long[] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls) { + myagg.value += vector[0]*batchSize; + } + return; + } + + if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + long[] vector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + myagg.value += vector[i]; + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + long[] vector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + myagg.value += vector[selected[i]]; + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + long[] vector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i paths = Utilities.getInputPathsTez(job, mrwork); - dirs = paths.toArray(new Path[paths.size()]); - if (dirs.length == 0) { - // if we still don't have any files it's time to fail. - throw new IOException("No input paths specified in job"); - } - } catch (Exception e) { - throw new IOException("Could not create input paths", e); - } - } else { - throw new IOException("No input paths specified in job"); - } - } + Path[] dirs = getInputPaths(job); JobConf newjob = new JobConf(job); ArrayList result = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java index bf44548..35db50c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java @@ -33,6 +33,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -264,8 +265,8 @@ public int hashCode() { /** * Create Hive splits based on CombineFileSplit. */ - @Override - public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { + private InputSplit[] getCombineSplits(JobConf job, + int numSplits) throws IOException { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); init(job); @@ -274,17 +275,6 @@ public int hashCode() { mrwork.getAliasToWork(); CombineFileInputFormatShim combine = ShimLoader.getHadoopShims() .getCombineFileInputFormat(); - - // on tez we're avoiding duplicating path info since the info will go over - // rpc - if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { - try { - List dirs = Utilities.getInputPathsTez(job, mrwork); - Utilities.setInputPaths(job, dirs); - } catch (Exception e) { - throw new IOException("Could not create input paths", e); - } - } InputSplit[] splits = null; if (combine == null) { @@ -327,13 +317,6 @@ public int hashCode() { // ignore } FileSystem inpFs = path.getFileSystem(job); - if (inputFormatClass.isAssignableFrom(OrcInputFormat.class)) { - if (inpFs.exists(new Path(path, OrcRecordUpdater.ACID_FORMAT))) { - throw new IOException("CombineHiveInputFormat is incompatible " + - " with ACID tables. Please set hive.input.format=" + - "org.apache.hadoop.hive.ql.io.HiveInputFormat"); - } - } // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not, // we use a configuration variable for the same @@ -461,6 +444,84 @@ public int hashCode() { return result.toArray(new CombineHiveInputSplit[result.size()]); } + /** + * Create Hive splits based on CombineFileSplit. + */ + @Override + public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { + init(job); + Map> pathToAliases = mrwork.getPathToAliases(); + Map> aliasToWork = + mrwork.getAliasToWork(); + + ArrayList result = new ArrayList(); + + Path[] paths = getInputPaths(job); + + List nonCombinablePaths = new ArrayList(paths.length / 2); + List combinablePaths = new ArrayList(paths.length / 2); + + for (Path path : paths) { + + PartitionDesc part = + HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, path, + IOPrepareCache.get().allocatePartitionDescMap()); + + // Use HiveInputFormat if any of the paths is not splittable + Class inputFormatClass = part.getInputFileFormatClass(); + String inputFormatClassName = inputFormatClass.getName(); + InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); + if (inputFormat instanceof AvoidSplitCombination && + ((AvoidSplitCombination) inputFormat).shouldSkipCombine(path, job)) { + if (LOG.isDebugEnabled()) { + LOG.debug("The split [" + path + + "] is being parked for HiveInputFormat.getSplits"); + } + nonCombinablePaths.add(path); + } else { + combinablePaths.add(path); + } + } + + // Store the previous value for the path specification + String oldPaths = job.get(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname); + if (LOG.isDebugEnabled()) { + LOG.debug("The received input paths are: [" + oldPaths + + "] against the property " + + HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname); + } + + // Process the normal splits + if (nonCombinablePaths.size() > 0) { + FileInputFormat.setInputPaths(job, nonCombinablePaths.toArray + (new Path[nonCombinablePaths.size()])); + InputSplit[] splits = super.getSplits(job, numSplits); + for (InputSplit split : splits) { + result.add(split); + } + } + + // Process the combine splits + if (combinablePaths.size() > 0) { + FileInputFormat.setInputPaths(job, combinablePaths.toArray + (new Path[combinablePaths.size()])); + InputSplit[] splits = getCombineSplits(job, numSplits); + for (InputSplit split : splits) { + result.add(split); + } + } + + // Restore the old path information back + // This is just to prevent incompatibilities with previous versions Hive + // if some application depends on the original value being set. + if (oldPaths != null) { + job.set(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname, oldPaths); + } + LOG.info("Number of all splits " + result.size()); + return result.toArray(new InputSplit[result.size()]); + } + private void processPaths(JobConf job, CombineFileInputFormatShim combine, List iss, Path... path) throws IOException { JobConf currJob = new JobConf(job); @@ -635,4 +696,12 @@ public String toString() { return s.toString(); } } + + /** + * This is a marker interface that is used to identify the formats where + * combine split generation is not applicable + */ + public interface AvoidSplitCombination { + boolean shouldSkipCombine(Path path, Configuration conf) throws IOException; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 5c4459b..8f4aeda 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -295,11 +295,7 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } } - public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { - PerfLogger perfLogger = PerfLogger.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); - init(job); - + Path[] getInputPaths(JobConf job) throws IOException { Path[] dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { // on tez we're avoiding to duplicate the file info in FileInputFormat. @@ -314,6 +310,14 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job throw new IOException("No input paths specified in job"); } } + return dirs; + } + + public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { + PerfLogger perfLogger = PerfLogger.getPerfLogger(); + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); + init(job); + Path[] dirs = getInputPaths(job); JobConf newjob = new JobConf(job); List result = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java index 2da490e..5e2d880 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java @@ -21,6 +21,8 @@ import java.nio.ByteBuffer; import java.util.EnumSet; +import javax.annotation.Nullable; + interface CompressionCodec { public enum Modifier { @@ -62,6 +64,6 @@ boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow * @param modifiers compression modifiers * @return codec for use after optional modification */ - CompressionCodec modify(EnumSet modifiers); + CompressionCodec modify(@Nullable EnumSet modifiers); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 913d3ac..0310fdf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -24,6 +24,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; +import java.util.NavigableMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -46,6 +48,7 @@ import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; @@ -99,7 +102,8 @@ */ public class OrcInputFormat implements InputFormat, InputFormatChecker, VectorizedInputFormatInterface, - AcidInputFormat { + AcidInputFormat, + CombineHiveInputFormat.AvoidSplitCombination { private static final Log LOG = LogFactory.getLog(OrcInputFormat.class); static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); @@ -125,6 +129,12 @@ */ private static final double MIN_INCLUDED_LOCATION = 0.80; + @Override + public boolean shouldSkipCombine(Path path, + Configuration conf) throws IOException { + return AcidUtils.isAcid(path, conf); + } + private static class OrcRecordReader implements org.apache.hadoop.mapred.RecordReader, StatsProvidingRecordReader { @@ -610,7 +620,7 @@ private FileInfo verifyCachedFileInfo(FileStatus file) { private final FileSystem fs; private final FileStatus file; private final long blockSize; - private final BlockLocation[] locations; + private final TreeMap locations; private final FileInfo fileInfo; private List stripes; private ReaderImpl.FileMetaInfo fileMetaInfo; @@ -630,7 +640,7 @@ private FileInfo verifyCachedFileInfo(FileStatus file) { this.file = file; this.blockSize = file.getBlockSize(); this.fileInfo = fileInfo; - locations = SHIMS.getLocations(fs, file); + locations = SHIMS.getLocationsWithOffset(fs, file); this.isOriginal = isOriginal; this.deltas = deltas; this.hasBase = hasBase; @@ -641,8 +651,8 @@ Path getPath() { } void schedule() throws IOException { - if(locations.length == 1 && file.getLen() < context.maxSize) { - String[] hosts = locations[0].getHosts(); + if(locations.size() == 1 && file.getLen() < context.maxSize) { + String[] hosts = locations.firstEntry().getValue().getHosts(); synchronized (context.splits) { context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(), hosts, fileMetaInfo, isOriginal, hasBase, deltas)); @@ -690,15 +700,22 @@ static long getOverlap(long offset1, long length1, void createSplit(long offset, long length, ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException { String[] hosts; - if ((offset % blockSize) + length <= blockSize) { + Map.Entry startEntry = locations.floorEntry(offset); + BlockLocation start = startEntry.getValue(); + if (offset + length <= start.getOffset() + start.getLength()) { // handle the single block case - hosts = locations[(int) (offset / blockSize)].getHosts(); + hosts = start.getHosts(); } else { + Map.Entry endEntry = locations.floorEntry(offset + length); + BlockLocation end = endEntry.getValue(); + //get the submap + NavigableMap navigableMap = locations.subMap(startEntry.getKey(), + true, endEntry.getKey(), true); // Calculate the number of bytes in the split that are local to each // host. Map sizes = new HashMap(); long maxSize = 0; - for(BlockLocation block: locations) { + for (BlockLocation block : navigableMap.values()) { long overlap = getOverlap(offset, length, block.getOffset(), block.getLength()); if (overlap > 0) { @@ -711,6 +728,9 @@ void createSplit(long offset, long length, val.set(val.get() + overlap); maxSize = Math.max(maxSize, val.get()); } + } else { + throw new IOException("File " + file.getPath().toString() + + " should have had overlap on block starting at " + block.getOffset()); } } // filter the list of locations to those that have at least 80% of the @@ -718,7 +738,7 @@ void createSplit(long offset, long length, long threshold = (long) (maxSize * MIN_INCLUDED_LOCATION); List hostList = new ArrayList(); // build the locations in a predictable order to simplify testing - for(BlockLocation block: locations) { + for(BlockLocation block: navigableMap.values()) { for(String host: block.getHosts()) { if (sizes.containsKey(host)) { if (sizes.get(host).get() >= threshold) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 19e7bad..67c19ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -77,6 +77,7 @@ class RecordReaderImpl implements RecordReader { private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class); + private static final boolean isLogTraceEnabled = LOG.isTraceEnabled(); private final FSDataInputStream file; private final long firstRow; @@ -3133,9 +3134,9 @@ public Object next(Object previous) throws IOException { // find the next row rowInStripe += 1; advanceToNextRow(rowInStripe + rowBaseInStripe); - if (LOG.isDebugEnabled()) { - LOG.debug("row from " + reader.path); - LOG.debug("orc row = " + result); + if (isLogTraceEnabled) { + LOG.trace("row from " + reader.path); + LOG.trace("orc row = " + result); } return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index ee85a17..620d4d4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -485,6 +485,7 @@ public OutStream createStream(int column, modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY); break; default: + LOG.warn("Missing ORC compression modifiers for " + kind); modifiers = null; break; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java index a7fd91b..03cc3c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java @@ -24,6 +24,8 @@ import java.util.zip.Deflater; import java.util.zip.Inflater; +import javax.annotation.Nullable; + import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType; import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; @@ -130,7 +132,12 @@ public void directDecompress(ByteBuffer in, ByteBuffer out) } @Override - public CompressionCodec modify(EnumSet modifiers) { + public CompressionCodec modify(@Nullable EnumSet modifiers) { + + if (modifiers == null) { + return this; + } + int l = this.level; int s = this.strategy; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java index 0919d2f..1334a91 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData; import org.apache.hadoop.hive.ql.metadata.*; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.KeeperException; @@ -73,31 +74,6 @@ public ZooKeeperHiveLockManager() { } /** - * @param conf The hive configuration - * Get the quorum server address from the configuration. The format is: - * host1:port, host2:port.. - **/ - @VisibleForTesting - static String getQuorumServers(HiveConf conf) { - String[] hosts = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM).split(","); - String port = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT); - StringBuilder quorum = new StringBuilder(); - for(int i=0; i locks = getLocks(conf, zkpClient, null, parent, false, false); @@ -629,7 +605,8 @@ public static void releaseAllLocks(HiveConf conf) throws Exception { if (fetchData) { try { - data = new HiveLockObjectData(new String(zkpClient.getData(curChild, new DummyWatcher(), null))); + data = new HiveLockObjectData(new String(zkpClient.getData(curChild, + new ZooKeeperHiveHelper.DummyWatcher(), null))); data.setClientIp(clientIp); } catch (Exception e) { LOG.error("Error in getting data for " + curChild, e); @@ -789,11 +766,6 @@ private static HiveLockMode getLockMode(HiveConf conf, String path) { return null; } - public static class DummyWatcher implements Watcher { - public void process(org.apache.zookeeper.WatchedEvent event) { - } - } - @Override public void prepareRetry() throws LockException { try { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index a3fc7e7..6a43d1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -193,11 +193,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } SelectOperator selOp = (SelectOperator)tsOp.getChildren().get(0); for(ExprNodeDesc desc : selOp.getConf().getColList()) { - if (!(desc instanceof ExprNodeColumnDesc)) { + if (!((desc instanceof ExprNodeColumnDesc) || (desc instanceof ExprNodeConstantDesc))) { // Probably an expression, cant handle that return null; } } + Map exprMap = selOp.getColumnExprMap(); // Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS // we need not to do any instanceof checks for following. GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0); @@ -215,6 +216,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } + for(ExprNodeDesc desc : selOp.getConf().getColList()) { + if (!(desc instanceof ExprNodeColumnDesc)) { + // Probably an expression, cant handle that + return null; + } + } FileSinkOperator fsOp = (FileSinkOperator)(selOp.getChildren().get(0)); if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) { // looks like a subq plan. @@ -236,22 +243,28 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, GenericUDAFResolver udaf = FunctionRegistry.getGenericUDAFResolver(aggr.getGenericUDAFName()); if (udaf instanceof GenericUDAFSum) { - if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){ + ExprNodeDesc desc = aggr.getParameters().get(0); + String constant; + if (desc instanceof ExprNodeConstantDesc) { + constant = ((ExprNodeConstantDesc) desc).getValue().toString(); + } else if (desc instanceof ExprNodeColumnDesc && exprMap.get(((ExprNodeColumnDesc)desc).getColumn()) instanceof ExprNodeConstantDesc) { + constant = ((ExprNodeConstantDesc)exprMap.get(((ExprNodeColumnDesc)desc).getColumn())).getValue().toString(); + } else { return null; } Long rowCnt = getRowCnt(pctx, tsOp, tbl); if(rowCnt == null) { return null; } - oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0)) - .getValue().toString()).multiply(HiveDecimal.create(rowCnt))); + oneRow.add(HiveDecimal.create(constant).multiply(HiveDecimal.create(rowCnt))); ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( PrimitiveCategory.DECIMAL)); } else if (udaf instanceof GenericUDAFCount) { Long rowCnt = 0L; - if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof - ExprNodeConstantDesc)) { + if (aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof + ExprNodeConstantDesc || ((aggr.getParameters().get(0) instanceof ExprNodeColumnDesc) && + exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()) instanceof ExprNodeConstantDesc)) { // Its either count (*) or count(1) case rowCnt = getRowCnt(pctx, tsOp, tbl); if(rowCnt == null) { @@ -259,12 +272,7 @@ else if (udaf instanceof GenericUDAFCount) { } } else { // Its count(col) case - if (!(aggr.getParameters().get(0) instanceof ExprNodeColumnDesc)) { - // this is weird, we got expr or something in there, bail out - Log.debug("Unexpected expression : " + aggr.getParameters().get(0)); - return null; - } - ExprNodeColumnDesc desc = (ExprNodeColumnDesc)aggr.getParameters().get(0); + ExprNodeColumnDesc desc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()); String colName = desc.getColumn(); StatType type = getType(desc.getTypeString()); if(!tbl.isPartitioned()) { @@ -330,7 +338,7 @@ else if (udaf instanceof GenericUDAFCount) { ois.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); } else if (udaf instanceof GenericUDAFMax) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()); String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if(!tbl.isPartitioned()) { @@ -419,7 +427,7 @@ else if (udaf instanceof GenericUDAFCount) { } } } else if (udaf instanceof GenericUDAFMin) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()); String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if (!tbl.isPartitioned()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index b2f66e0..4ff9678 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -115,6 +115,10 @@ protected LineageInfo linfo; protected TableAccessInfo tableAccessInfo; protected ColumnAccessInfo columnAccessInfo; + /** + * Columns accessed by updates + */ + protected ColumnAccessInfo updateColumnAccessInfo; public boolean skipAuthorization() { @@ -866,6 +870,14 @@ public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) { this.columnAccessInfo = columnAccessInfo; } + public ColumnAccessInfo getUpdateColumnAccessInfo() { + return updateColumnAccessInfo; + } + + public void setUpdateColumnAccessInfo(ColumnAccessInfo updateColumnAccessInfo) { + this.updateColumnAccessInfo = updateColumnAccessInfo; + } + protected LinkedHashMap extractPartitionSpecs(Tree partspec) throws SemanticException { LinkedHashMap partSpec = new LinkedHashMap(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java index a4df8b4..8416cff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -54,4 +56,21 @@ public void add(String table, String col) { } return mapping; } + + /** + * Strip a virtual column out of the set of columns. This is useful in cases where we do not + * want to be checking against the user reading virtual columns, namely update and delete. + * @param vc + */ + public void stripVirtualColumn(VirtualColumn vc) { + for (Map.Entry> e : tableToColumnAccessMap.entrySet()) { + for (String columnName : e.getValue()) { + if (vc.getName().equalsIgnoreCase(columnName)) { + e.getValue().remove(columnName); + break; + } + } + } + + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 05cde3e..8c2564f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1145,7 +1145,10 @@ private void analyzeDropIndex(ASTNode ast) throws SemanticException { } } - inputs.add(new ReadEntity(getTable(tableName))); + Table tbl = getTable(tableName, false); + if (tbl != null) { + inputs.add(new ReadEntity(getTable(tableName))); + } DropIndexDesc dropIdxDesc = new DropIndexDesc(indexName, tableName); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 25cd3a5..714e7d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -1530,8 +1530,8 @@ principalSpecification principalName @init {pushMsg("user|group|role name", state);} @after {popMsg(state);} - : KW_USER identifier -> ^(TOK_USER identifier) - | KW_GROUP identifier -> ^(TOK_GROUP identifier) + : KW_USER principalIdentifier -> ^(TOK_USER principalIdentifier) + | KW_GROUP principalIdentifier -> ^(TOK_GROUP principalIdentifier) | KW_ROLE identifier -> ^(TOK_ROLE identifier) ; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 34d2dfc..13d5255 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -536,6 +536,13 @@ functionIdentifier identifier ; +principalIdentifier +@init { gParent.pushMsg("identifier for principal spec", state); } +@after { gParent.popMsg(state); } + : identifier + | QuotedIdentifier + ; + nonReserved : KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION | KW_VALUES diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 392f7ce..db2ad3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -47,7 +47,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; @@ -5866,7 +5865,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) if (!isNonNativeTable) { AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + checkAcidConstraints(qb, table_desc); } ltd = new LoadTableDesc(queryTmpdir,table_desc, dpCtx, acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -5973,7 +5972,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) dest_part.isStoredAsSubDirectories(), conf); AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + checkAcidConstraints(qb, table_desc); } ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -6233,15 +6232,19 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) return output; } - // Check if we are overwriting any tables. If so, throw an exception as that is not allowed - // when using an Acid compliant txn manager and operating on an acid table. - private void checkIfAcidAndOverwriting(QB qb, TableDesc tableDesc) throws SemanticException { + // Check constraints on acid tables. This includes + // * no insert overwrites + // * no use of vectorization + private void checkAcidConstraints(QB qb, TableDesc tableDesc) throws SemanticException { String tableName = tableDesc.getTableName(); if (!qb.getParseInfo().isInsertIntoTable(tableName)) { LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } - + if (conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) { + LOG.info("Turning off vectorization for acid write operation"); + conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); + } } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index e44f5ae..5c5589a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -751,12 +751,10 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, if (myt.getCategory() == Category.LIST) { // Only allow integer index for now - if (!(children.get(1) instanceof ExprNodeConstantDesc) - || !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() - .equals(TypeInfoFactory.intTypeInfo))) { + if (!FunctionRegistry.implicitConvertible(children.get(1).getTypeInfo(), + TypeInfoFactory.intTypeInfo)) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg())); + expr, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); } // Calculate TypeInfo @@ -764,14 +762,8 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry .getGenericUDFForIndex(), children); } else if (myt.getCategory() == Category.MAP) { - // Only allow constant map key for now - if (!(children.get(1) instanceof ExprNodeConstantDesc)) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg())); - } - if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() - .equals(((MapTypeInfo) myt).getMapKeyTypeInfo()))) { + if (!FunctionRegistry.implicitConvertible(children.get(1).getTypeInfo(), + ((MapTypeInfo) myt).getMapKeyTypeInfo())) { throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE .getMsg(expr)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 3aaa09c..82c8333 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -28,11 +28,13 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.session.SessionState; import java.io.IOException; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -127,7 +129,9 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { try { mTable = db.getTable(tableName[0], tableName[1]); } catch (HiveException e) { - throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e); + LOG.error("Failed to find table " + getDotName(tableName) + " got exception " + + e.getMessage()); + throw new SemanticException(ErrorMsg.INVALID_TABLE, getDotName(tableName)); } List partCols = mTable.getPartCols(); @@ -148,6 +152,8 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { rewrittenQueryStr.append(" select ROW__ID"); Map setColExprs = null; + Map setCols = null; + Set setRCols = new HashSet(); if (updating()) { // An update needs to select all of the columns, as we rewrite the entire row. Also, // we need to figure out which columns we are going to replace. We won't write the set @@ -160,7 +166,7 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { // Get the children of the set clause, each of which should be a column assignment List assignments = setClause.getChildren(); - Map setCols = new HashMap(assignments.size()); + setCols = new HashMap(assignments.size()); setColExprs = new HashMap(assignments.size()); for (Node a : assignments) { ASTNode assignment = (ASTNode)a; @@ -173,6 +179,8 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { assert colName.getToken().getType() == HiveParser.Identifier : "Expected column name"; + addSetRCols((ASTNode) assignment.getChildren().get(1), setRCols); + String columnName = colName.getText(); // Make sure this isn't one of the partitioning columns, that's not supported. @@ -323,6 +331,28 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { WriteEntity.WriteType.UPDATE); } } + + // For updates, we need to set the column access info so that it contains information on + // the columns we are updating. + if (updating()) { + ColumnAccessInfo cai = new ColumnAccessInfo(); + for (String colName : setCols.keySet()) { + cai.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName); + } + setUpdateColumnAccessInfo(cai); + + // Add the setRCols to the input list + for (String colName : setRCols) { + columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), + colName); + } + } + + // We need to weed ROW__ID out of the input column info, as it doesn't make any sense to + // require the user to have authorization on that column. + if (columnAccessInfo != null) { + columnAccessInfo.stripVirtualColumn(VirtualColumn.ROWID); + } } private String operation() { @@ -342,4 +372,22 @@ private boolean inputIsPartitioned(Set inputs) { } return false; } + + // This method find any columns on the right side of a set statement (thus rcols) and puts them + // in a set so we can add them to the list of input cols to check. + private void addSetRCols(ASTNode node, Set setRCols) { + + // See if this node is a TOK_TABLE_OR_COL. If so, find the value and put it in the list. If + // not, recurse on any children + if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) { + ASTNode colName = (ASTNode)node.getChildren().get(0); + assert colName.getToken().getType() == HiveParser.Identifier : + "Expected column name"; + setRCols.add(colName.getText()); + } else if (node.getChildren() != null) { + for (Node n : node.getChildren()) { + addSetRCols(node, setRCols); + } + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java index 69783e7..ffe9c0b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java @@ -21,6 +21,9 @@ import java.util.Arrays; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; @@ -31,6 +34,7 @@ import com.google.common.base.Joiner; class CommandUtil { + public static final Log LOG = LogFactory.getLog(CommandUtil.class); /** * Authorize command of given type and arguments @@ -47,14 +51,19 @@ static CommandProcessorResponse authorizeCommand(SessionState ss, HiveOperationT // ss can be null in unit tests return null; } - if (ss.isAuthorizationModeV2()) { + + if (ss.isAuthorizationModeV2() && + HiveConf.getBoolVar(ss.getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + String errMsg = "Error authorizing command " + command; try { authorizeCommandThrowEx(ss, type, command); // authorized to perform action return null; } catch (HiveAuthzPluginException e) { + LOG.error(errMsg, e); return CommandProcessorResponse.create(e); } catch (HiveAccessControlException e) { + LOG.error(errMsg, e); return CommandProcessorResponse.create(e); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java index 93df9f4..1e1f3da 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java @@ -310,9 +310,12 @@ public static HivePrivObjectActionType getActionType(Entity privObject) { return HivePrivObjectActionType.INSERT; case INSERT_OVERWRITE: return HivePrivObjectActionType.INSERT_OVERWRITE; + case UPDATE: + return HivePrivObjectActionType.UPDATE; + case DELETE: + return HivePrivObjectActionType.DELETE; default: - // Ignore other types for purposes of authorization, we are interested only - // in INSERT vs INSERT_OVERWRITE as of now + // Ignore other types for purposes of authorization break; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java index 093b4fd..01d9cb6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java @@ -81,7 +81,7 @@ private int compare(Collection o1, Collection o2) { GLOBAL, DATABASE, TABLE_OR_VIEW, PARTITION, COLUMN, LOCAL_URI, DFS_URI, COMMAND_PARAMS, FUNCTION } ; public enum HivePrivObjectActionType { - OTHER, INSERT, INSERT_OVERWRITE + OTHER, INSERT, INSERT_OVERWRITE, UPDATE, DELETE }; private final HivePrivilegeObjectType type; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java new file mode 100644 index 0000000..cabc22a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationValidator; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; + +/** + * A no-op HiveAuthorizationValidator for use from hive cli. + */ +public class DummyHiveAuthorizationValidator implements HiveAuthorizationValidator { + + public static final Log LOG = LogFactory.getLog(DummyHiveAuthorizationValidator.class); + + @Override + public void checkPrivileges(HiveOperationType hiveOpType, List inputHObjs, + List outputHObjs, HiveAuthzContext context) + throws HiveAuthzPluginException, HiveAccessControlException { + // no-op + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java index 3236341..d43eee4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java @@ -118,6 +118,7 @@ public HivePrivilegeObjectType getObjectType() { private static SQLPrivTypeGrant[] ADMIN_PRIV_AR = arr(SQLPrivTypeGrant.ADMIN_PRIV); private static SQLPrivTypeGrant[] INS_NOGRANT_AR = arr(SQLPrivTypeGrant.INSERT_NOGRANT); private static SQLPrivTypeGrant[] DEL_NOGRANT_AR = arr(SQLPrivTypeGrant.DELETE_NOGRANT); + private static SQLPrivTypeGrant[] UPD_NOGRANT_AR = arr(SQLPrivTypeGrant.UPDATE_NOGRANT); private static SQLPrivTypeGrant[] OWNER_INS_SEL_DEL_NOGRANT_AR = arr(SQLPrivTypeGrant.OWNER_PRIV, SQLPrivTypeGrant.INSERT_NOGRANT, @@ -287,8 +288,14 @@ public HivePrivilegeObjectType getObjectType() { op2Priv.put(HiveOperationType.QUERY, arr( new PrivRequirement(SEL_NOGRANT_AR, IOType.INPUT), - new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, null), - new PrivRequirement(DEL_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.INSERT_OVERWRITE) + new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.INSERT), + new PrivRequirement( + arr(SQLPrivTypeGrant.INSERT_NOGRANT, SQLPrivTypeGrant.DELETE_NOGRANT), + IOType.OUTPUT, + HivePrivObjectActionType.INSERT_OVERWRITE), + new PrivRequirement(DEL_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.DELETE), + new PrivRequirement(UPD_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.UPDATE), + new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.OTHER) ) ); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java index 96c4b48..1637162 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Database; @@ -53,6 +54,8 @@ import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilege; @@ -455,4 +458,23 @@ public static HivePrincipal getValidatedPrincipal(HivePrincipal hPrincipal) return hivePrincipals; } + /** + * Change the session context based on configuration to aid in testing of sql + * std auth + * + * @param ctx + * @param conf + * @return + */ + static HiveAuthzSessionContext applyTestSettings(HiveAuthzSessionContext ctx, HiveConf conf) { + if (conf.getBoolVar(ConfVars.HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE) + && ctx.getClientType() == CLIENT_TYPE.HIVECLI) { + // create new session ctx object with HS2 as client type + HiveAuthzSessionContext.Builder ctxBuilder = new HiveAuthzSessionContext.Builder(ctx); + ctxBuilder.setClientType(CLIENT_TYPE.HIVESERVER2); + return ctxBuilder.build(); + } + return ctx; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdConfOnlyAuthorizerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdConfOnlyAuthorizerFactory.java new file mode 100644 index 0000000..c80f349 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdConfOnlyAuthorizerFactory.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience.Private; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerImpl; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; + +/** + * Authorization class that can be used from hive cli, so that configuration + * in cli mode is set appropriately for SQL standards authorization. + * This ensures that new tables and views have proper privileges for the table/view owner. + * + * Uses DummyHiveAuthorizationValidator for no-op authorization checks. Authorization using + * sql standards based authorization mode can't be done securely with hive-cli, as hive-cli + * users have direct access to the file system. + */ +@Private +public class SQLStdConfOnlyAuthorizerFactory implements HiveAuthorizerFactory { + @Override + public HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreClientFactory, + HiveConf conf, HiveAuthenticationProvider authenticator, HiveAuthzSessionContext ctx) throws HiveAuthzPluginException { + + SQLStdHiveAccessControllerWrapper privilegeManager = + new SQLStdHiveAccessControllerWrapper(metastoreClientFactory, conf, authenticator, ctx); + return new HiveAuthorizerImpl(privilegeManager, new DummyHiveAuthorizationValidator()); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java index 532bcc5..6708425 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java @@ -90,42 +90,11 @@ public SQLStdHiveAccessController(HiveMetastoreClientFactory metastoreClientFact HiveAuthenticationProvider authenticator, HiveAuthzSessionContext ctx) throws HiveAuthzPluginException { this.metastoreClientFactory = metastoreClientFactory; this.authenticator = authenticator; - this.sessionCtx = applyTestSettings(ctx, conf); - - assertHiveCliAuthDisabled(conf); - initUserRoles(); + this.sessionCtx = SQLAuthorizationUtils.applyTestSettings(ctx, conf); LOG.info("Created SQLStdHiveAccessController for session context : " + sessionCtx); } /** - * Change the session context based on configuration to aid in testing of sql std auth - * @param ctx - * @param conf - * @return - */ - private HiveAuthzSessionContext applyTestSettings(HiveAuthzSessionContext ctx, HiveConf conf) { - if(conf.getBoolVar(ConfVars.HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE) && - ctx.getClientType() == CLIENT_TYPE.HIVECLI - ){ - // create new session ctx object with HS2 as client type - HiveAuthzSessionContext.Builder ctxBuilder = new HiveAuthzSessionContext.Builder(ctx); - ctxBuilder.setClientType(CLIENT_TYPE.HIVESERVER2); - return ctxBuilder.build(); - } - return ctx; - } - - private void assertHiveCliAuthDisabled(HiveConf conf) throws HiveAuthzPluginException { - if (sessionCtx.getClientType() == CLIENT_TYPE.HIVECLI - && conf.getBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - throw new HiveAuthzPluginException( - "SQL standards based authorization should not be enabled from hive cli" - + "Instead the use of storage based authorization in hive metastore is reccomended. Set " - + ConfVars.HIVE_AUTHORIZATION_ENABLED.varname + "=false to disable authz within cli"); - } - } - - /** * (Re-)initialize currentRoleNames if necessary. * @throws HiveAuthzPluginException */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java index 4555a71..0e093b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java @@ -25,12 +25,15 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationValidator; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; @@ -44,16 +47,30 @@ private final HiveConf conf; private final HiveAuthenticationProvider authenticator; private final SQLStdHiveAccessControllerWrapper privController; + private final HiveAuthzSessionContext ctx; public static final Log LOG = LogFactory.getLog(SQLStdHiveAuthorizationValidator.class); public SQLStdHiveAuthorizationValidator(HiveMetastoreClientFactory metastoreClientFactory, HiveConf conf, HiveAuthenticationProvider authenticator, - SQLStdHiveAccessControllerWrapper privilegeManager) { + SQLStdHiveAccessControllerWrapper privilegeManager, HiveAuthzSessionContext ctx) + throws HiveAuthzPluginException { this.metastoreClientFactory = metastoreClientFactory; this.conf = conf; this.authenticator = authenticator; this.privController = privilegeManager; + this.ctx = SQLAuthorizationUtils.applyTestSettings(ctx, conf); + assertHiveCliAuthDisabled(conf); + } + + private void assertHiveCliAuthDisabled(HiveConf conf) throws HiveAuthzPluginException { + if (ctx.getClientType() == CLIENT_TYPE.HIVECLI + && conf.getBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + throw new HiveAuthzPluginException( + "SQL standards based authorization should not be enabled from hive cli" + + "Instead the use of storage based authorization in hive metastore is reccomended. Set " + + ConfVars.HIVE_AUTHORIZATION_ENABLED.varname + "=false to disable authz within cli"); + } } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java index de5dacc..e3d49a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java @@ -37,7 +37,7 @@ public HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreC return new HiveAuthorizerImpl( privilegeManager, new SQLStdHiveAuthorizationValidator(metastoreClientFactory, conf, authenticator, - privilegeManager) + privilegeManager, ctx) ); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 24fe26a..3a14828 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -519,16 +519,17 @@ private void createSessionDirs(String userName) throws IOException { */ private Path createRootHDFSDir(HiveConf conf) throws IOException { Path rootHDFSDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR)); - FsPermission expectedHDFSDirPermission = new FsPermission("777"); + FsPermission writableHDFSDirPermission = new FsPermission((short)00733); FileSystem fs = rootHDFSDirPath.getFileSystem(conf); if (!fs.exists(rootHDFSDirPath)) { - Utilities.createDirsWithPermission(conf, rootHDFSDirPath, expectedHDFSDirPermission, true); + Utilities.createDirsWithPermission(conf, rootHDFSDirPath, writableHDFSDirPermission, true); } FsPermission currentHDFSDirPermission = fs.getFileStatus(rootHDFSDirPath).getPermission(); LOG.debug("HDFS root scratch dir: " + rootHDFSDirPath + ", permission: " + currentHDFSDirPermission); - // If the root HDFS scratch dir already exists, make sure the permissions are 777. - if (!expectedHDFSDirPermission.equals(fs.getFileStatus(rootHDFSDirPath).getPermission())) { + // If the root HDFS scratch dir already exists, make sure it is writeable. + if (!((currentHDFSDirPermission.toShort() & writableHDFSDirPermission + .toShort()) == writableHDFSDirPermission.toShort())) { throw new RuntimeException("The root scratch dir: " + rootHDFSDirPath + " on HDFS should be writable. Current permissions are: " + currentHDFSDirPermission); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java index 5911f2c..bdb2361 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java @@ -26,9 +26,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.IntWritable; /** * GenericUDFIndex. @@ -36,11 +38,10 @@ */ @Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ") public class GenericUDFIndex extends GenericUDF { + private transient MapObjectInspector mapOI; - private boolean mapKeyPreferWritable; private transient ListObjectInspector listOI; - private transient PrimitiveObjectInspector indexOI; - private transient ObjectInspector returnOI; + private transient ObjectInspectorConverters.Converter converter; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -66,21 +67,22 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } // index has to be a primitive - if (arguments[1] instanceof PrimitiveObjectInspector) { - indexOI = (PrimitiveObjectInspector) arguments[1]; - } else { + if (!(arguments[1] instanceof PrimitiveObjectInspector)) { throw new UDFArgumentTypeException(1, "Primitive Type is expected but " + arguments[1].getTypeName() + "\" is found"); } - + PrimitiveObjectInspector inputOI = (PrimitiveObjectInspector) arguments[1]; + ObjectInspector returnOI; + ObjectInspector indexOI; if (mapOI != null) { + indexOI = ObjectInspectorConverters.getConvertedOI( + inputOI, mapOI.getMapKeyObjectInspector()); returnOI = mapOI.getMapValueObjectInspector(); - ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); - mapKeyPreferWritable = ((PrimitiveObjectInspector) keyOI) - .preferWritable(); } else { + indexOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; returnOI = listOI.getListElementObjectInspector(); } + converter = ObjectInspectorConverters.getConverter(inputOI, indexOI); return returnOI; } @@ -88,35 +90,16 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { assert (arguments.length == 2); - Object main = arguments[0].get(); Object index = arguments[1].get(); + Object indexObject = converter.convert(index); + if (indexObject == null) { + return null; + } if (mapOI != null) { - - Object indexObject; - if (mapKeyPreferWritable) { - indexObject = indexOI.getPrimitiveWritableObject(index); - } else { - indexObject = indexOI.getPrimitiveJavaObject(index); - } - return mapOI.getMapValueElement(main, indexObject); - - } else { - - assert (listOI != null); - int intIndex = 0; - try { - intIndex = PrimitiveObjectInspectorUtils.getInt(index, indexOI); - } catch (NullPointerException e) { - // If index is null, we should return null. - return null; - } catch (NumberFormatException e) { - // If index is not a number, we should return null. - return null; - } - return listOI.getListElement(main, intIndex); - + return mapOI.getMapValueElement(arguments[0].get(), indexObject); } + return listOI.getListElement(arguments[0].get(), ((IntWritable)indexObject).get()); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/ZooKeeperHiveHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/util/ZooKeeperHiveHelper.java new file mode 100644 index 0000000..d9faa45 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/ZooKeeperHiveHelper.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.util; + +import java.util.List; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.ACL; + +public class ZooKeeperHiveHelper { + public static final Log LOG = LogFactory.getLog(ZooKeeperHiveHelper.class.getName()); + public static final String ZOOKEEPER_PATH_SEPARATOR = "/"; + /** + * Get the ensemble server addresses from the configuration. The format is: host1:port, + * host2:port.. + * + * @param conf + **/ + public static String getQuorumServers(HiveConf conf) { + String[] hosts = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM).split(","); + String port = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT); + StringBuilder quorum = new StringBuilder(); + for (int i = 0; i < hosts.length; i++) { + quorum.append(hosts[i].trim()); + if (!hosts[i].contains(":")) { + // if the hostname doesn't contain a port, add the configured port to hostname + quorum.append(":"); + quorum.append(port); + } + + if (i != hosts.length - 1) + quorum.append(","); + } + + return quorum.toString(); + } + + + /** + * Create a path on ZooKeeper, if it does not already exist ("mkdir -p") + * + * @param zooKeeperClient ZooKeeper session + * @param path string with ZOOKEEPER_PATH_SEPARATOR as the separator + * @param acl list of ACL entries + * @param createMode for create mode of each node in the patch + * @return + * @throws KeeperException + * @throws InterruptedException + */ + public static String createPathRecursively(ZooKeeper zooKeeperClient, String path, List acl, + CreateMode createMode) throws KeeperException, InterruptedException { + String[] pathComponents = StringUtils.splitByWholeSeparator(path, ZOOKEEPER_PATH_SEPARATOR); + String currentPath = ""; + for (String pathComponent : pathComponents) { + currentPath += ZOOKEEPER_PATH_SEPARATOR + pathComponent; + try { + String node = zooKeeperClient.create(currentPath, new byte[0], acl, createMode); + LOG.info("Created path: " + node); + } catch (KeeperException.NodeExistsException e) { + // Do nothing here + } + } + return currentPath; + } + + /** + * A no-op watcher class + */ + public static class DummyWatcher implements Watcher { + public void process(org.apache.zookeeper.WatchedEvent event) { + } + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index d7d2a34..f2e8113 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -20,7 +20,6 @@ import java.lang.reflect.Method; import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -80,7 +79,7 @@ protected void setUp() { } private void implicit(TypeInfo a, TypeInfo b, boolean convertible) { - assertEquals(convertible, FunctionRegistry.implicitConvertable(a,b)); + assertEquals(convertible, FunctionRegistry.implicitConvertible(a, b)); } public void testImplicitConversion() { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index bbde09c..640a9f9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -18,17 +18,24 @@ package org.apache.hadoop.hive.ql.exec; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.CollectDesc; @@ -42,6 +49,10 @@ import org.apache.hadoop.hive.ql.plan.ScriptDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.processors.CommandProcessor; +import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -49,8 +60,14 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TextInputFormat; +import org.junit.Test; /** * TestOperators. @@ -274,7 +291,7 @@ public void testScriptOperator() throws Throwable { cd, sop); op.initialize(new JobConf(TestOperators.class), - new ObjectInspector[] {r[0].oi}); + new ObjectInspector[]{r[0].oi}); // evaluate on row for (int i = 0; i < 5; i++) { @@ -379,4 +396,82 @@ public void testMapOperator() throws Throwable { throw (e); } } + + @Test + public void testFetchOperatorContextQuoting() throws Exception { + JobConf conf = new JobConf(); + ArrayList list = new ArrayList(); + list.add(new Path("hdfs://nn.example.com/fi\tl\\e\t1")); + list.add(new Path("hdfs://nn.example.com/file\t2")); + list.add(new Path("file:/file3")); + FetchOperator.setFetchOperatorContext(conf, list); + String[] parts = + conf.get(FetchOperator.FETCH_OPERATOR_DIRECTORY_LIST).split("\t"); + assertEquals(3, parts.length); + assertEquals("hdfs://nn.example.com/fi\\tl\\\\e\\t1", parts[0]); + assertEquals("hdfs://nn.example.com/file\\t2", parts[1]); + assertEquals("file:/file3", parts[2]); + } + + /** + * A custom input format that checks to make sure that the fetch operator + * sets the required attributes. + */ + public static class CustomInFmt extends TextInputFormat { + + @Override + public InputSplit[] getSplits(JobConf job, int splits) throws IOException { + + // ensure that the table properties were copied + assertEquals("val1", job.get("myprop1")); + assertEquals("val2", job.get("myprop2")); + + // ensure that both of the partitions are in the complete list. + String[] dirs = job.get("hive.complete.dir.list").split("\t"); + assertEquals(2, dirs.length); + assertEquals(true, dirs[0].endsWith("/state=CA")); + assertEquals(true, dirs[1].endsWith("/state=OR")); + return super.getSplits(job, splits); + } + } + + @Test + public void testFetchOperatorContext() throws Exception { + HiveConf conf = new HiveConf(); + conf.set("hive.support.concurrency", "false"); + SessionState.start(conf); + String cmd = "create table fetchOp (id int, name string) " + + "partitioned by (state string) " + + "row format delimited fields terminated by '|' " + + "stored as " + + "inputformat 'org.apache.hadoop.hive.ql.exec.TestOperators$CustomInFmt' " + + "outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' " + + "tblproperties ('myprop1'='val1', 'myprop2' = 'val2')"; + Driver driver = new Driver(); + driver.init(); + CommandProcessorResponse response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + List result = new ArrayList(); + + cmd = "load data local inpath '../data/files/employee.dat' " + + "overwrite into table fetchOp partition (state='CA')"; + driver.init(); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + + cmd = "load data local inpath '../data/files/employee2.dat' " + + "overwrite into table fetchOp partition (state='OR')"; + driver.init(); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + + cmd = "select * from fetchOp"; + driver.init(); + driver.setMaxRows(500); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + driver.getResults(result); + assertEquals(20, result.size()); + driver.close(); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 50447e8..525aa99 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -601,6 +602,30 @@ public void testCountStar() throws HiveException { } @Test + public void testCountReduce() throws HiveException { + testAggregateCountReduce( + 2, + Arrays.asList(new Long[]{}), + 0L); + testAggregateCountReduce( + 2, + Arrays.asList(new Long[]{0L}), + 0L); + testAggregateCountReduce( + 2, + Arrays.asList(new Long[]{0L,0L}), + 0L); + testAggregateCountReduce( + 2, + Arrays.asList(new Long[]{0L,1L,0L}), + 1L); + testAggregateCountReduce( + 2, + Arrays.asList(new Long[]{13L,0L,7L,19L}), + 39L); + } + + @Test public void testCountDecimal() throws HiveException { testAggregateDecimal( "Decimal", @@ -1210,7 +1235,7 @@ public void testCountLongEmpty () throws HiveException { "count", 2, Arrays.asList(new Long[]{}), - null); + 0L); } @Test @@ -2027,6 +2052,17 @@ public void testAggregateCountStar ( testAggregateCountStarIterable (fdr, expected); } + public void testAggregateCountReduce ( + int batchSize, + Iterable values, + Object expected) throws HiveException { + + @SuppressWarnings("unchecked") + FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, + values); + testAggregateCountReduceIterable (fdr, expected); + } + public static interface Validator { void validate (String key, Object expected, Object result); @@ -2223,6 +2259,37 @@ public void testAggregateCountStarIterable ( validator.validate("_total", expected, result); } + public void testAggregateCountReduceIterable ( + Iterable data, + Object expected) throws HiveException { + Map mapColumnNames = new HashMap(); + mapColumnNames.put("A", 0); + VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); + + GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); + VectorGroupByDesc vectorDesc = desc.getVectorDesc(); + vectorDesc.setIsReduce(true); + + VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); + + FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); + vgo.initialize(null, null); + + for (VectorizedRowBatch unit: data) { + vgo.processOp(unit, 0); + } + vgo.close(false); + + List outBatchList = out.getCapturedRows(); + assertNotNull(outBatchList); + assertEquals(1, outBatchList.size()); + + Object result = outBatchList.get(0); + + Validator validator = getValidator("count"); + validator.validate("_total", expected, result); + } + public void testAggregateStringIterable ( String aggregateName, Iterable data, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index a3ac1f7..5fedb62 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -118,7 +118,6 @@ TimeZone gmt = TimeZone.getTimeZone("GMT+0"); DATE_FORMAT.setTimeZone(gmt); TIME_FORMAT.setTimeZone(gmt); - TimeZone local = TimeZone.getDefault(); } public static class BigRow implements Writable { @@ -560,6 +559,12 @@ public MockOutputStream(MockFile file) throws IOException { this.file = file; } + /** + * Set the blocks and their location for the file. + * Must be called after the stream is closed or the block length will be + * wrong. + * @param blocks the list of blocks + */ public void setBlocks(MockBlock... blocks) { file.blocks = blocks; int offset = 0; @@ -580,12 +585,18 @@ public void close() throws IOException { file.content = new byte[file.length]; System.arraycopy(buf.getData(), 0, file.content, 0, file.length); } + + @Override + public String toString() { + return "Out stream to " + file.toString(); + } } public static class MockFileSystem extends FileSystem { final List files = new ArrayList(); Path workingDir = new Path("/"); + @SuppressWarnings("unused") public MockFileSystem() { // empty } @@ -620,7 +631,7 @@ public FSDataInputStream open(Path path, int i) throws IOException { return new FSDataInputStream(new MockInputStream(file)); } } - return null; + throw new IOException("File not found: " + path); } @Override @@ -743,8 +754,12 @@ public FileStatus getFileStatus(Path path) throws IOException { for(MockBlock block: file.blocks) { if (OrcInputFormat.SplitGenerator.getOverlap(block.offset, block.length, start, len) > 0) { + String[] topology = new String[block.hosts.length]; + for(int i=0; i < topology.length; ++i) { + topology[i] = "/rack/ " + block.hosts[i]; + } result.add(new BlockLocation(block.hosts, block.hosts, - block.offset, block.length)); + topology, block.offset, block.length)); } } return result.toArray(new BlockLocation[result.size()]); @@ -1209,7 +1224,8 @@ JobConf createMockExecutionEnvironment(Path workDir, Path warehouseDir, String tableName, ObjectInspector objectInspector, - boolean isVectorized + boolean isVectorized, + int partitions ) throws IOException { Utilities.clearWorkMap(); JobConf conf = new JobConf(); @@ -1218,9 +1234,20 @@ JobConf createMockExecutionEnvironment(Path workDir, conf.set("hive.vectorized.execution.enabled", Boolean.toString(isVectorized)); conf.set("fs.mock.impl", MockFileSystem.class.getName()); conf.set("mapred.mapper.class", ExecMapper.class.getName()); - Path root = new Path(warehouseDir, tableName + "/p=0"); + Path root = new Path(warehouseDir, tableName); + // clean out previous contents ((MockFileSystem) root.getFileSystem(conf)).clear(); - conf.set("mapred.input.dir", root.toString()); + // build partition strings + String[] partPath = new String[partitions]; + StringBuilder buffer = new StringBuilder(); + for(int p=0; p < partitions; ++p) { + partPath[p] = new Path(root, "p=" + p).toString(); + if (p != 0) { + buffer.append(','); + } + buffer.append(partPath[p]); + } + conf.set("mapred.input.dir", buffer.toString()); StringBuilder columnIds = new StringBuilder(); StringBuilder columnNames = new StringBuilder(); StringBuilder columnTypes = new StringBuilder(); @@ -1249,9 +1276,6 @@ JobConf createMockExecutionEnvironment(Path workDir, tblProps.put("columns.types", columnTypes.toString()); TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps); - LinkedHashMap partSpec = - new LinkedHashMap(); - PartitionDesc part = new PartitionDesc(tbl, partSpec); MapWork mapWork = new MapWork(); mapWork.setVectorMode(isVectorized); @@ -1260,11 +1284,16 @@ JobConf createMockExecutionEnvironment(Path workDir, new LinkedHashMap>(); ArrayList aliases = new ArrayList(); aliases.add(tableName); - aliasMap.put(root.toString(), aliases); - mapWork.setPathToAliases(aliasMap); LinkedHashMap partMap = new LinkedHashMap(); - partMap.put(root.toString(), part); + for(int p=0; p < partitions; ++p) { + aliasMap.put(partPath[p], aliases); + LinkedHashMap partSpec = + new LinkedHashMap(); + PartitionDesc part = new PartitionDesc(tbl, partSpec); + partMap.put(partPath[p], part); + } + mapWork.setPathToAliases(aliasMap); mapWork.setPathToPartitionInfo(partMap); mapWork.setScratchColumnMap(new HashMap>()); mapWork.setScratchColumnVectorTypes(new HashMap inputFormat = new CombineHiveInputFormat(); - try { - InputSplit[] splits = inputFormat.getSplits(conf, 1); - assertTrue("shouldn't reach here", false); - } catch (IOException ioe) { - assertEquals("CombineHiveInputFormat is incompatible" - + " with ACID tables. Please set hive.input.format=org.apache.hadoop" - + ".hive.ql.io.HiveInputFormat", - ioe.getMessage()); - } + InputSplit[] splits = inputFormat.getSplits(conf, 1); + assertEquals(3, splits.length); + HiveInputFormat.HiveInputSplit split = + (HiveInputFormat.HiveInputSplit) splits[0]; + assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + split.inputFormatClassName()); + assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000", + split.getPath().toString()); + assertEquals(0, split.getStart()); + assertEquals(580, split.getLength()); + split = (HiveInputFormat.HiveInputSplit) splits[1]; + assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + split.inputFormatClassName()); + assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001", + split.getPath().toString()); + assertEquals(0, split.getStart()); + assertEquals(601, split.getLength()); + CombineHiveInputFormat.CombineHiveInputSplit combineSplit = + (CombineHiveInputFormat.CombineHiveInputSplit) splits[2]; + assertEquals(BUCKETS, combineSplit.getNumPaths()); + for(int bucket=0; bucket < BUCKETS; ++bucket) { + assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", + combineSplit.getPath(bucket).toString()); + assertEquals(0, combineSplit.getOffset(bucket)); + assertEquals(227, combineSplit.getLength(bucket)); + } + String[] hosts = combineSplit.getLocations(); + assertEquals(2, hosts.length); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java index 59294b1..aacb73f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject; +import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooKeeper; import org.junit.Assert; @@ -87,14 +88,14 @@ public void testDeleteWithChildren() throws Exception { public void testGetQuorumServers() { conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM, "node1"); conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT, "9999"); - Assert.assertEquals("node1:9999", ZooKeeperHiveLockManager.getQuorumServers(conf)); + Assert.assertEquals("node1:9999", ZooKeeperHiveHelper.getQuorumServers(conf)); conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM, "node1,node2,node3"); conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT, "9999"); - Assert.assertEquals("node1:9999,node2:9999,node3:9999", ZooKeeperHiveLockManager.getQuorumServers(conf)); + Assert.assertEquals("node1:9999,node2:9999,node3:9999", ZooKeeperHiveHelper.getQuorumServers(conf)); conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM, "node1:5666,node2,node3"); conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT, "9999"); - Assert.assertEquals("node1:5666,node2:9999,node3:9999", ZooKeeperHiveLockManager.getQuorumServers(conf)); + Assert.assertEquals("node1:5666,node2:9999,node3:9999", ZooKeeperHiveHelper.getQuorumServers(conf)); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/TestSQLStdHiveAccessControllerCLI.java b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/TestSQLStdHiveAccessControllerCLI.java index c0d4948..5c4d713 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/TestSQLStdHiveAccessControllerCLI.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/TestSQLStdHiveAccessControllerCLI.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator; import org.apache.hadoop.hive.ql.security.authorization.plugin.DisallowTransformHook; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.Builder; @@ -77,8 +79,9 @@ public void testAuthEnableError() { HiveConf processedConf = new HiveConf(); processedConf.setBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED, true); try { - SQLStdHiveAccessController accessController = new SQLStdHiveAccessController(null, - processedConf, new HadoopDefaultAuthenticator(), getCLISessionCtx()); + HiveAuthorizerFactory authorizerFactory = new SQLStdHiveAuthorizerFactory(); + HiveAuthorizer authorizer = authorizerFactory.createHiveAuthorizer(null, processedConf, + new HadoopDefaultAuthenticator(), getCLISessionCtx()); fail("Exception expected"); } catch (HiveAuthzPluginException e) { assertTrue(e.getMessage().contains( diff --git a/ql/src/test/queries/clientnegative/authorization_delete_nodeletepriv.q b/ql/src/test/queries/clientnegative/authorization_delete_nodeletepriv.q new file mode 100644 index 0000000..090495a --- /dev/null +++ b/ql/src/test/queries/clientnegative/authorization_delete_nodeletepriv.q @@ -0,0 +1,17 @@ +set hive.test.authz.sstd.hs2.mode=true; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; +set hive.security.authorization.enabled=true; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; + + +-- check update without update priv +create table auth_nodel(i int) clustered by (i) into 2 buckets stored as orc;; + +set user.name=user1; +delete from auth_nodel where i > 0; + diff --git a/ql/src/test/queries/clientnegative/authorization_update_noupdatepriv.q b/ql/src/test/queries/clientnegative/authorization_update_noupdatepriv.q new file mode 100644 index 0000000..922beba --- /dev/null +++ b/ql/src/test/queries/clientnegative/authorization_update_noupdatepriv.q @@ -0,0 +1,17 @@ +set hive.test.authz.sstd.hs2.mode=true; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; +set hive.security.authorization.enabled=true; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; + + +-- check update without update priv +create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc;; + +set user.name=user1; +update auth_noupd set i = 0 where i > 0; + diff --git a/ql/src/test/queries/clientnegative/update_no_such_table.q b/ql/src/test/queries/clientnegative/update_no_such_table.q new file mode 100644 index 0000000..522c46d --- /dev/null +++ b/ql/src/test/queries/clientnegative/update_no_such_table.q @@ -0,0 +1,7 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; + + +update no_such_table set b = 'fred'; diff --git a/ql/src/test/queries/clientpositive/acid_vectorization.q b/ql/src/test/queries/clientpositive/acid_vectorization.q new file mode 100644 index 0000000..9d91d88 --- /dev/null +++ b/ql/src/test/queries/clientpositive/acid_vectorization.q @@ -0,0 +1,16 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=true; +set hive.mapred.supports.subdirectories=true; + +CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC; +insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; +set hive.vectorized.execution.enabled=true; +insert into table acid_vectorized values (1, 'bar'); +set hive.vectorized.execution.enabled=true; +update acid_vectorized set b = 'foo' where b = 'bar'; +set hive.vectorized.execution.enabled=true; +delete from acid_vectorized where b = 'foo'; diff --git a/ql/src/test/queries/clientpositive/array_map_access_nonconstant.q b/ql/src/test/queries/clientpositive/array_map_access_nonconstant.q new file mode 100644 index 0000000..49c1f54 --- /dev/null +++ b/ql/src/test/queries/clientpositive/array_map_access_nonconstant.q @@ -0,0 +1,15 @@ +set hive.fetch.task.conversion=more; + +create table array_table (array array, index int ); +insert into table array_table select array('first', 'second', 'third'), key%3 from src tablesample (4 rows); + +explain +select index, array[index] from array_table; +select index, array[index] from array_table; + +create table map_table (data map, key int ); +insert into table map_table select map('1','one','2','two','3','three'), cast((key%3+1) as int) from src tablesample (4 rows); + +explain +select key, data[key] from map_table; +select key, data[key] from map_table; diff --git a/ql/src/test/queries/clientpositive/authorization_cli_createtab.q b/ql/src/test/queries/clientpositive/authorization_cli_createtab.q index 25fc80a..ffaf0ad 100644 --- a/ql/src/test/queries/clientpositive/authorization_cli_createtab.q +++ b/ql/src/test/queries/clientpositive/authorization_cli_createtab.q @@ -1,6 +1,5 @@ -set hive.test.authz.sstd.hs2.mode=true; set hive.users.in.admin.role=hive_admin_user; -set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; set user.name=hive_test_user; diff --git a/ql/src/test/queries/clientpositive/authorization_cli_createtab_noauthzapi.q b/ql/src/test/queries/clientpositive/authorization_cli_createtab_noauthzapi.q new file mode 100644 index 0000000..c39df65 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_cli_createtab_noauthzapi.q @@ -0,0 +1,12 @@ +set hive.users.in.admin.role=hive_admin_user; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; +set hive.metastore.pre.event.listeners=org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener; +set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.MetastoreAuthzAPIDisallowAuthorizer; +set user.name=hive_test_user; + +-- verify that sql std auth can be set as the authorizer with hive cli, while metastore authorization api calls are disabled (for cli) + +create table t_cli(i int); + +create view v_cli (i) as select i from t_cli; diff --git a/ql/src/test/queries/clientpositive/authorization_cli_nonsql.q b/ql/src/test/queries/clientpositive/authorization_cli_nonsql.q new file mode 100644 index 0000000..58d3bf5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_cli_nonsql.q @@ -0,0 +1,29 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authorization.enabled=false; + +-- Verify that dfs,compile,add,delete commands can be run from hive cli, and no authorization checks happen when auth is diabled + +use default; + +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_admin_almighty1; +dfs -ls ${system:test.tmp.dir}/a_admin_almighty1; + +create table a_table1(a int, b int); +add jar ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar; +alter table a_table1 set serde 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties('s1'='9'); +drop table a_table; + +delete jar ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar; + +compile `import org.apache.hadoop.hive.ql.exec.UDF \; +public class Pyth extends UDF { + public double evaluate(double a, double b){ + return Math.sqrt((a*a) + (b*b)) \; + } +} `AS GROOVY NAMED Pyth.groovy; +CREATE TEMPORARY FUNCTION Pyth as 'Pyth'; + +SELECT Pyth(3,4) FROM src tablesample (1 rows); + +DROP TEMPORARY FUNCTION Pyth; + diff --git a/ql/src/test/queries/clientpositive/authorization_cli_stdconfigauth.q b/ql/src/test/queries/clientpositive/authorization_cli_stdconfigauth.q new file mode 100644 index 0000000..a3f8e64 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_cli_stdconfigauth.q @@ -0,0 +1,10 @@ +set hive.users.in.admin.role=hive_admin_user; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; +set hive.security.authorization.enabled=true; + +-- verify that SQLStdConfOnlyAuthorizerFactory as the authorizer factory with hive cli, with hive.security.authorization.enabled=true +-- authorization verification would be just no-op + +create table t_cli(i int); +describe t_cli; diff --git a/ql/src/test/queries/clientpositive/authorization_delete.q b/ql/src/test/queries/clientpositive/authorization_delete.q new file mode 100644 index 0000000..ebd0315 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_delete.q @@ -0,0 +1,25 @@ +set hive.test.authz.sstd.hs2.mode=true; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE t_auth_del(i int) clustered by (i) into 2 buckets stored as orc; + +-- grant update privilege to another user +GRANT DELETE ON t_auth_del TO USER userWIns; +GRANT SELECT ON t_auth_del TO USER userWIns; + +set user.name=hive_admin_user; +set role admin; +SHOW GRANT ON TABLE t_auth_del; + + +set user.name=userWIns; +delete from t_auth_del where i > 0; diff --git a/ql/src/test/queries/clientpositive/authorization_delete_own_table.q b/ql/src/test/queries/clientpositive/authorization_delete_own_table.q new file mode 100644 index 0000000..19dbbeb --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_delete_own_table.q @@ -0,0 +1,17 @@ +set hive.test.authz.sstd.hs2.mode=true; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; +set hive.security.authorization.enabled=true; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; + + +set user.name=user1; +create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc;; +delete from auth_noupd where i > 0; + +set user.name=hive_admin_user; +set role admin; diff --git a/ql/src/test/queries/clientpositive/authorization_non_id.q b/ql/src/test/queries/clientpositive/authorization_non_id.q new file mode 100644 index 0000000..26d4b50 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_non_id.q @@ -0,0 +1,25 @@ +set hive.test.authz.sstd.hs2.mode=true; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; +set user.name=hive_admin_user; + +set role ADMIN; +drop table if exists src_autho_test; +create table src_autho_test (id int); + +create role src_role2; + +grant role src_role2 to user bar; +grant role src_role2 to user `foo-1`; + +show role grant user bar; +show role grant user `foo-1`; + +grant select on table src_autho_test to user bar; +grant select on table src_autho_test to user `foo-1`; + +show grant user bar on all; +show grant user `foo-1` on all; + +drop table src_autho_test; +drop role src_role2; diff --git a/ql/src/test/queries/clientpositive/authorization_update.q b/ql/src/test/queries/clientpositive/authorization_update.q new file mode 100644 index 0000000..18ceadb --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_update.q @@ -0,0 +1,28 @@ +set hive.test.authz.sstd.hs2.mode=true; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE t_auth_up(i int) clustered by (i) into 2 buckets stored as orc; + +CREATE TABLE t_select(i int); +GRANT ALL ON TABLE t_select TO ROLE public; + +-- grant update privilege to another user +GRANT UPDATE ON t_auth_up TO USER userWIns; +GRANT SELECT ON t_auth_up TO USER userWIns; + +set user.name=hive_admin_user; +set role admin; +SHOW GRANT ON TABLE t_auth_up; + + +set user.name=userWIns; +update t_auth_up set i = 0 where i > 0; diff --git a/ql/src/test/queries/clientpositive/authorization_update_own_table.q b/ql/src/test/queries/clientpositive/authorization_update_own_table.q new file mode 100644 index 0000000..46beb49 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_update_own_table.q @@ -0,0 +1,17 @@ +set hive.test.authz.sstd.hs2.mode=true; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; +set hive.security.authorization.enabled=true; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; + + +set user.name=user1; +create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc;; +update auth_noupd set i = 0 where i > 0; + +set user.name=hive_admin_user; +set role admin; diff --git a/ql/src/test/queries/clientpositive/drop_index.q b/ql/src/test/queries/clientpositive/drop_index.q index df23f8d..54ef823 100644 --- a/ql/src/test/queries/clientpositive/drop_index.q +++ b/ql/src/test/queries/clientpositive/drop_index.q @@ -1,2 +1,3 @@ SET hive.exec.drop.ignorenonexistent=false; DROP INDEX IF EXISTS UnknownIndex ON src; +DROP INDEX IF EXISTS UnknownIndex ON UnknownTable; diff --git a/ql/src/test/queries/clientpositive/escape3.q b/ql/src/test/queries/clientpositive/escape3.q new file mode 100644 index 0000000..192ee84 --- /dev/null +++ b/ql/src/test/queries/clientpositive/escape3.q @@ -0,0 +1,48 @@ +-- with string +CREATE TABLE escape3_1 +( +GERUND STRING, +ABBREV STRING, +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_1; + +select * from escape3_1; + +-- with varchar +CREATE TABLE escape3_2 +( +GERUND VARCHAR(10), +ABBREV VARCHAR(3), +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_2; + +select * from escape3_2; + +-- with char +CREATE TABLE escape3_3 +( +GERUND CHAR(10), +ABBREV CHAR(3), +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_3; + +select * from escape3_3; + +DROP TABLE escape3_1; +DROP TABLE escape3_2; +DROP TABLE escape3_3; diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q b/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q index 09f4bff..e016ef8 100644 --- a/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q +++ b/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q @@ -46,6 +46,8 @@ explain select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010; select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010; +select count(*) from stats_tbl_part; +select count(*)/2 from stats_tbl_part; drop table stats_tbl_part; set hive.compute.query.using.stats=false; set hive.stats.dbclass=jdbc:derby; diff --git a/ql/src/test/queries/clientpositive/serde_opencsv.q b/ql/src/test/queries/clientpositive/serde_opencsv.q new file mode 100644 index 0000000..a5ef8da --- /dev/null +++ b/ql/src/test/queries/clientpositive/serde_opencsv.q @@ -0,0 +1,36 @@ +EXPLAIN +CREATE TABLE serde_opencsv( + words STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +WITH SERDEPROPERTIES( + "separatorChar" = ",", + "quoteChar" = "'", + "escapeChar" = "\\" +) stored as textfile; + +CREATE TABLE serde_opencsv( + words STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +WITH SERDEPROPERTIES( + "separatorChar" = ",", + "quoteChar" = "'", + "escapeChar" = "\\" +) stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/opencsv-data.txt" INTO TABLE serde_opencsv; + +SELECT count(*) FROM serde_opencsv; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/vectorization_short_regress.q b/ql/src/test/queries/clientpositive/vectorization_short_regress.q index 638a31f..d1194cc 100644 --- a/ql/src/test/queries/clientpositive/vectorization_short_regress.q +++ b/ql/src/test/queries/clientpositive/vectorization_short_regress.q @@ -850,3 +850,52 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1; +-- These tests verify COUNT on empty or null colulmns work correctly. +create table test_count(i int) stored as orc; + +explain +select count(*) from test_count; + +select count(*) from test_count; + +explain +select count(i) from test_count; + +select count(i) from test_count; + +create table alltypesnull like alltypesorc; +alter table alltypesnull set fileformat textfile; + +insert into table alltypesnull select null, null, null, null, null, null, null, null, null, null, null, null from alltypesorc; + +create table alltypesnullorc stored as orc as select * from alltypesnull; + +explain +select count(*) from alltypesnullorc; + +select count(*) from alltypesnullorc; + +explain +select count(ctinyint) from alltypesnullorc; + +select count(ctinyint) from alltypesnullorc; + +explain +select count(cint) from alltypesnullorc; + +select count(cint) from alltypesnullorc; + +explain +select count(cfloat) from alltypesnullorc; + +select count(cfloat) from alltypesnullorc; + +explain +select count(cstring1) from alltypesnullorc; + +select count(cstring1) from alltypesnullorc; + +explain +select count(cboolean1) from alltypesnullorc; + +select count(cboolean1) from alltypesnullorc; diff --git a/ql/src/test/queries/negative/invalid_list_index.q b/ql/src/test/queries/negative/invalid_list_index.q deleted file mode 100644 index c40f079..0000000 --- a/ql/src/test/queries/negative/invalid_list_index.q +++ /dev/null @@ -1,2 +0,0 @@ -FROM src_thrift -INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint[0], src_thrift.lstring['abc'] diff --git a/ql/src/test/queries/negative/invalid_list_index2.q b/ql/src/test/queries/negative/invalid_list_index2.q deleted file mode 100644 index 99d0b3d..0000000 --- a/ql/src/test/queries/negative/invalid_list_index2.q +++ /dev/null @@ -1,2 +0,0 @@ -FROM src_thrift -INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint[0], src_thrift.lstring[1 + 2] diff --git a/ql/src/test/queries/negative/invalid_map_index.q b/ql/src/test/queries/negative/invalid_map_index.q deleted file mode 100644 index c2b9eab..0000000 --- a/ql/src/test/queries/negative/invalid_map_index.q +++ /dev/null @@ -1,2 +0,0 @@ -FROM src_thrift -INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint[0], src_thrift.mstringstring[0] diff --git a/ql/src/test/queries/negative/invalid_map_index2.q b/ql/src/test/queries/negative/invalid_map_index2.q deleted file mode 100644 index 5828f07..0000000 --- a/ql/src/test/queries/negative/invalid_map_index2.q +++ /dev/null @@ -1,2 +0,0 @@ -FROM src_thrift -INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint[0], src_thrift.mstringstring[concat('abc', 'abc')] diff --git a/ql/src/test/results/clientnegative/authorization_delete_nodeletepriv.q.out b/ql/src/test/results/clientnegative/authorization_delete_nodeletepriv.q.out new file mode 100644 index 0000000..00fb3f7 --- /dev/null +++ b/ql/src/test/results/clientnegative/authorization_delete_nodeletepriv.q.out @@ -0,0 +1,11 @@ +PREHOOK: query: -- check update without update priv +create table auth_nodel(i int) clustered by (i) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@auth_nodel +POSTHOOK: query: -- check update without update priv +create table auth_nodel(i int) clustered by (i) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@auth_nodel +FAILED: HiveAccessControlException Permission denied: Principal [name=user1, type=USER] does not have following privileges for operation QUERY [[DELETE] on Object [type=TABLE_OR_VIEW, name=default.auth_nodel], [SELECT] on Object [type=TABLE_OR_VIEW, name=default.auth_nodel]] diff --git a/ql/src/test/results/clientnegative/authorization_update_noupdatepriv.q.out b/ql/src/test/results/clientnegative/authorization_update_noupdatepriv.q.out new file mode 100644 index 0000000..e4b3c5e --- /dev/null +++ b/ql/src/test/results/clientnegative/authorization_update_noupdatepriv.q.out @@ -0,0 +1,11 @@ +PREHOOK: query: -- check update without update priv +create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@auth_noupd +POSTHOOK: query: -- check update without update priv +create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@auth_noupd +FAILED: HiveAccessControlException Permission denied: Principal [name=user1, type=USER] does not have following privileges for operation QUERY [[SELECT] on Object [type=TABLE_OR_VIEW, name=default.auth_noupd], [UPDATE] on Object [type=TABLE_OR_VIEW, name=default.auth_noupd]] diff --git a/ql/src/test/results/clientnegative/update_no_such_table.q.out b/ql/src/test/results/clientnegative/update_no_such_table.q.out new file mode 100644 index 0000000..e4f19e4 --- /dev/null +++ b/ql/src/test/results/clientnegative/update_no_such_table.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10001]: Table not found default.no_such_table diff --git a/ql/src/test/results/clientpositive/acid_vectorization.q.out b/ql/src/test/results/clientpositive/acid_vectorization.q.out new file mode 100644 index 0000000..4a9d19f --- /dev/null +++ b/ql/src/test/results/clientpositive/acid_vectorization.q.out @@ -0,0 +1,44 @@ +PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_vectorized values (1, 'bar') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized values (1, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: delete from acid_vectorized where b = 'foo' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: delete from acid_vectorized where b = 'foo' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized diff --git a/ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out b/ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out new file mode 100644 index 0000000..d8b88c6 --- /dev/null +++ b/ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: create table array_table (array array, index int ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@array_table +POSTHOOK: query: create table array_table (array array, index int ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@array_table +PREHOOK: query: insert into table array_table select array('first', 'second', 'third'), key%3 from src tablesample (4 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@array_table +POSTHOOK: query: insert into table array_table select array('first', 'second', 'third'), key%3 from src tablesample (4 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@array_table +POSTHOOK: Lineage: array_table.array EXPRESSION [] +POSTHOOK: Lineage: array_table.index EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain +select index, array[index] from array_table +PREHOOK: type: QUERY +POSTHOOK: query: explain +select index, array[index] from array_table +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: array_table + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: index (type: int), array[index] (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select index, array[index] from array_table +PREHOOK: type: QUERY +PREHOOK: Input: default@array_table +#### A masked pattern was here #### +POSTHOOK: query: select index, array[index] from array_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_table +#### A masked pattern was here #### +1 second +2 third +2 third +0 first +PREHOOK: query: create table map_table (data map, key int ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@map_table +POSTHOOK: query: create table map_table (data map, key int ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@map_table +PREHOOK: query: insert into table map_table select map('1','one','2','two','3','three'), cast((key%3+1) as int) from src tablesample (4 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@map_table +POSTHOOK: query: insert into table map_table select map('1','one','2','two','3','three'), cast((key%3+1) as int) from src tablesample (4 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@map_table +POSTHOOK: Lineage: map_table.data EXPRESSION [] +POSTHOOK: Lineage: map_table.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain +select key, data[key] from map_table +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, data[key] from map_table +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: map_table + Statistics: Num rows: 4 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), data[key] (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 84 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key, data[key] from map_table +PREHOOK: type: QUERY +PREHOOK: Input: default@map_table +#### A masked pattern was here #### +POSTHOOK: query: select key, data[key] from map_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@map_table +#### A masked pattern was here #### +2 two +3 three +3 three +1 one diff --git a/ql/src/test/results/clientpositive/authorization_cli_createtab_noauthzapi.q.out b/ql/src/test/results/clientpositive/authorization_cli_createtab_noauthzapi.q.out new file mode 100644 index 0000000..1375dfa --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_cli_createtab_noauthzapi.q.out @@ -0,0 +1,22 @@ +PREHOOK: query: -- verify that sql std auth can be set as the authorizer with hive cli, while metastore authorization api calls are disabled (for cli) + +create table t_cli(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_cli +POSTHOOK: query: -- verify that sql std auth can be set as the authorizer with hive cli, while metastore authorization api calls are disabled (for cli) + +create table t_cli(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_cli +PREHOOK: query: create view v_cli (i) as select i from t_cli +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@t_cli +PREHOOK: Output: database:default +PREHOOK: Output: default@v_cli +POSTHOOK: query: create view v_cli (i) as select i from t_cli +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@t_cli +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v_cli diff --git a/ql/src/test/results/clientpositive/authorization_cli_nonsql.q.out b/ql/src/test/results/clientpositive/authorization_cli_nonsql.q.out new file mode 100644 index 0000000..ded382b --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_cli_nonsql.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: -- Verify that dfs,compile,add,delete commands can be run from hive cli, and no authorization checks happen when auth is diabled + +use default +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:default +POSTHOOK: query: -- Verify that dfs,compile,add,delete commands can be run from hive cli, and no authorization checks happen when auth is diabled + +use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:default +PREHOOK: query: create table a_table1(a int, b int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a_table1 +POSTHOOK: query: create table a_table1(a int, b int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a_table1 +PREHOOK: query: alter table a_table1 set serde 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties('s1'='9') +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@a_table1 +PREHOOK: Output: default@a_table1 +POSTHOOK: query: alter table a_table1 set serde 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties('s1'='9') +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@a_table1 +POSTHOOK: Output: default@a_table1 +PREHOOK: query: drop table a_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table a_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TEMPORARY FUNCTION Pyth as 'Pyth' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: pyth +POSTHOOK: query: CREATE TEMPORARY FUNCTION Pyth as 'Pyth' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: pyth +PREHOOK: query: SELECT Pyth(3,4) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT Pyth(3,4) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +5.0 +PREHOOK: query: DROP TEMPORARY FUNCTION Pyth +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: Pyth +POSTHOOK: query: DROP TEMPORARY FUNCTION Pyth +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: Pyth diff --git a/ql/src/test/results/clientpositive/authorization_cli_stdconfigauth.q.out b/ql/src/test/results/clientpositive/authorization_cli_stdconfigauth.q.out new file mode 100644 index 0000000..a70b2bc --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_cli_stdconfigauth.q.out @@ -0,0 +1,21 @@ +PREHOOK: query: -- verify that SQLStdConfOnlyAuthorizerFactory as the authorizer factory with hive cli, with hive.security.authorization.enabled=true +-- authorization verification would be just no-op + +create table t_cli(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_cli +POSTHOOK: query: -- verify that SQLStdConfOnlyAuthorizerFactory as the authorizer factory with hive cli, with hive.security.authorization.enabled=true +-- authorization verification would be just no-op + +create table t_cli(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_cli +PREHOOK: query: describe t_cli +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t_cli +POSTHOOK: query: describe t_cli +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t_cli +i int diff --git a/ql/src/test/results/clientpositive/authorization_delete.q.out b/ql/src/test/results/clientpositive/authorization_delete.q.out new file mode 100644 index 0000000..9aa4600 --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_delete.q.out @@ -0,0 +1,48 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE t_auth_del(i int) clustered by (i) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_auth_del +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE t_auth_del(i int) clustered by (i) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_auth_del +PREHOOK: query: -- grant update privilege to another user +GRANT DELETE ON t_auth_del TO USER userWIns +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@t_auth_del +POSTHOOK: query: -- grant update privilege to another user +GRANT DELETE ON t_auth_del TO USER userWIns +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@t_auth_del +PREHOOK: query: GRANT SELECT ON t_auth_del TO USER userWIns +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@t_auth_del +POSTHOOK: query: GRANT SELECT ON t_auth_del TO USER userWIns +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@t_auth_del +PREHOOK: query: set role admin +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role admin +POSTHOOK: type: SHOW_ROLES +PREHOOK: query: SHOW GRANT ON TABLE t_auth_del +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT ON TABLE t_auth_del +POSTHOOK: type: SHOW_GRANT +default t_auth_del user1 USER DELETE true -1 user1 +default t_auth_del user1 USER INSERT true -1 user1 +default t_auth_del user1 USER SELECT true -1 user1 +default t_auth_del user1 USER UPDATE true -1 user1 +default t_auth_del userWIns USER DELETE false -1 user1 +default t_auth_del userWIns USER SELECT false -1 user1 +PREHOOK: query: delete from t_auth_del where i > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_auth_del +PREHOOK: Output: default@t_auth_del +POSTHOOK: query: delete from t_auth_del where i > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_auth_del +POSTHOOK: Output: default@t_auth_del diff --git a/ql/src/test/results/clientpositive/authorization_delete_own_table.q.out b/ql/src/test/results/clientpositive/authorization_delete_own_table.q.out new file mode 100644 index 0000000..1e0f9c8 --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_delete_own_table.q.out @@ -0,0 +1,20 @@ +PREHOOK: query: create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@auth_noupd +POSTHOOK: query: create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@auth_noupd +PREHOOK: query: delete from auth_noupd where i > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@auth_noupd +PREHOOK: Output: default@auth_noupd +POSTHOOK: query: delete from auth_noupd where i > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@auth_noupd +POSTHOOK: Output: default@auth_noupd +PREHOOK: query: set role admin +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role admin +POSTHOOK: type: SHOW_ROLES diff --git a/ql/src/test/results/clientpositive/authorization_non_id.q.out b/ql/src/test/results/clientpositive/authorization_non_id.q.out new file mode 100644 index 0000000..9b81d22 --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_non_id.q.out @@ -0,0 +1,74 @@ +PREHOOK: query: set role ADMIN +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role ADMIN +POSTHOOK: type: SHOW_ROLES +PREHOOK: query: drop table if exists src_autho_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists src_autho_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_autho_test (id int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_autho_test +POSTHOOK: query: create table src_autho_test (id int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_autho_test +PREHOOK: query: create role src_role2 +PREHOOK: type: CREATEROLE +POSTHOOK: query: create role src_role2 +POSTHOOK: type: CREATEROLE +PREHOOK: query: grant role src_role2 to user bar +PREHOOK: type: GRANT_ROLE +POSTHOOK: query: grant role src_role2 to user bar +POSTHOOK: type: GRANT_ROLE +PREHOOK: query: grant role src_role2 to user `foo-1` +PREHOOK: type: GRANT_ROLE +POSTHOOK: query: grant role src_role2 to user `foo-1` +POSTHOOK: type: GRANT_ROLE +PREHOOK: query: show role grant user bar +PREHOOK: type: SHOW_ROLE_GRANT +POSTHOOK: query: show role grant user bar +POSTHOOK: type: SHOW_ROLE_GRANT +public false -1 +src_role2 false -1 hive_admin_user +PREHOOK: query: show role grant user `foo-1` +PREHOOK: type: SHOW_ROLE_GRANT +POSTHOOK: query: show role grant user `foo-1` +POSTHOOK: type: SHOW_ROLE_GRANT +public false -1 +src_role2 false -1 hive_admin_user +PREHOOK: query: grant select on table src_autho_test to user bar +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@src_autho_test +POSTHOOK: query: grant select on table src_autho_test to user bar +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@src_autho_test +PREHOOK: query: grant select on table src_autho_test to user `foo-1` +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@src_autho_test +POSTHOOK: query: grant select on table src_autho_test to user `foo-1` +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@src_autho_test +PREHOOK: query: show grant user bar on all +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: show grant user bar on all +POSTHOOK: type: SHOW_GRANT +default src_autho_test bar USER SELECT false -1 hive_admin_user +PREHOOK: query: show grant user `foo-1` on all +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: show grant user `foo-1` on all +POSTHOOK: type: SHOW_GRANT +default src_autho_test foo-1 USER SELECT false -1 hive_admin_user +PREHOOK: query: drop table src_autho_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_autho_test +PREHOOK: Output: default@src_autho_test +POSTHOOK: query: drop table src_autho_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_autho_test +POSTHOOK: Output: default@src_autho_test +PREHOOK: query: drop role src_role2 +PREHOOK: type: DROPROLE +POSTHOOK: query: drop role src_role2 +POSTHOOK: type: DROPROLE diff --git a/ql/src/test/results/clientpositive/authorization_update.q.out b/ql/src/test/results/clientpositive/authorization_update.q.out new file mode 100644 index 0000000..019d363 --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_update.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE t_auth_up(i int) clustered by (i) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_auth_up +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE t_auth_up(i int) clustered by (i) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_auth_up +PREHOOK: query: CREATE TABLE t_select(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_select +POSTHOOK: query: CREATE TABLE t_select(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_select +PREHOOK: query: GRANT ALL ON TABLE t_select TO ROLE public +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@t_select +POSTHOOK: query: GRANT ALL ON TABLE t_select TO ROLE public +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@t_select +PREHOOK: query: -- grant update privilege to another user +GRANT UPDATE ON t_auth_up TO USER userWIns +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@t_auth_up +POSTHOOK: query: -- grant update privilege to another user +GRANT UPDATE ON t_auth_up TO USER userWIns +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@t_auth_up +PREHOOK: query: GRANT SELECT ON t_auth_up TO USER userWIns +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@t_auth_up +POSTHOOK: query: GRANT SELECT ON t_auth_up TO USER userWIns +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@t_auth_up +PREHOOK: query: set role admin +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role admin +POSTHOOK: type: SHOW_ROLES +PREHOOK: query: SHOW GRANT ON TABLE t_auth_up +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT ON TABLE t_auth_up +POSTHOOK: type: SHOW_GRANT +default t_auth_up user1 USER DELETE true -1 user1 +default t_auth_up user1 USER INSERT true -1 user1 +default t_auth_up user1 USER SELECT true -1 user1 +default t_auth_up user1 USER UPDATE true -1 user1 +default t_auth_up userWIns USER SELECT false -1 user1 +default t_auth_up userWIns USER UPDATE false -1 user1 +PREHOOK: query: update t_auth_up set i = 0 where i > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_auth_up +PREHOOK: Output: default@t_auth_up +POSTHOOK: query: update t_auth_up set i = 0 where i > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_auth_up +POSTHOOK: Output: default@t_auth_up diff --git a/ql/src/test/results/clientpositive/authorization_update_own_table.q.out b/ql/src/test/results/clientpositive/authorization_update_own_table.q.out new file mode 100644 index 0000000..cbf8f57 --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_update_own_table.q.out @@ -0,0 +1,20 @@ +PREHOOK: query: create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@auth_noupd +POSTHOOK: query: create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@auth_noupd +PREHOOK: query: update auth_noupd set i = 0 where i > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@auth_noupd +PREHOOK: Output: default@auth_noupd +POSTHOOK: query: update auth_noupd set i = 0 where i > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@auth_noupd +POSTHOOK: Output: default@auth_noupd +PREHOOK: query: set role admin +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role admin +POSTHOOK: type: SHOW_ROLES diff --git a/ql/src/test/results/clientpositive/drop_index.q.out b/ql/src/test/results/clientpositive/drop_index.q.out index 71afdff..dc154d3 100644 --- a/ql/src/test/results/clientpositive/drop_index.q.out +++ b/ql/src/test/results/clientpositive/drop_index.q.out @@ -4,3 +4,7 @@ PREHOOK: Input: default@src POSTHOOK: query: DROP INDEX IF EXISTS UnknownIndex ON src POSTHOOK: type: DROPINDEX POSTHOOK: Input: default@src +PREHOOK: query: DROP INDEX IF EXISTS UnknownIndex ON UnknownTable +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX IF EXISTS UnknownIndex ON UnknownTable +POSTHOOK: type: DROPINDEX diff --git a/ql/src/test/results/clientpositive/escape3.q.out b/ql/src/test/results/clientpositive/escape3.q.out new file mode 100644 index 0000000..dc53583 --- /dev/null +++ b/ql/src/test/results/clientpositive/escape3.q.out @@ -0,0 +1,165 @@ +PREHOOK: query: -- with string +CREATE TABLE escape3_1 +( +GERUND STRING, +ABBREV STRING, +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@escape3_1 +POSTHOOK: query: -- with string +CREATE TABLE escape3_1 +( +GERUND STRING, +ABBREV STRING, +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@escape3_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@escape3_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@escape3_1 +PREHOOK: query: select * from escape3_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@escape3_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from escape3_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@escape3_1 +#### A masked pattern was here #### +re|ading V|A 100 +writ|ing MD 200 +w|aiting |NC 300 +seein|g TN| 400 +runn|ing WV 500 +PREHOOK: query: -- with varchar +CREATE TABLE escape3_2 +( +GERUND VARCHAR(10), +ABBREV VARCHAR(3), +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@escape3_2 +POSTHOOK: query: -- with varchar +CREATE TABLE escape3_2 +( +GERUND VARCHAR(10), +ABBREV VARCHAR(3), +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@escape3_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@escape3_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@escape3_2 +PREHOOK: query: select * from escape3_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@escape3_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from escape3_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@escape3_2 +#### A masked pattern was here #### +re|ading V|A 100 +writ|ing MD 200 +w|aiting |NC 300 +seein|g TN| 400 +runn|ing WV 500 +PREHOOK: query: -- with char +CREATE TABLE escape3_3 +( +GERUND CHAR(10), +ABBREV CHAR(3), +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@escape3_3 +POSTHOOK: query: -- with char +CREATE TABLE escape3_3 +( +GERUND CHAR(10), +ABBREV CHAR(3), +CODE SMALLINT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' ESCAPED BY '\134' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@escape3_3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@escape3_3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/data_with_escape.txt' INTO TABLE escape3_3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@escape3_3 +PREHOOK: query: select * from escape3_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@escape3_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from escape3_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@escape3_3 +#### A masked pattern was here #### +re|ading V|A 100 +writ|ing MD 200 +w|aiting |NC 300 +seein|g TN| 400 +runn|ing WV 500 +PREHOOK: query: DROP TABLE escape3_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@escape3_1 +PREHOOK: Output: default@escape3_1 +POSTHOOK: query: DROP TABLE escape3_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@escape3_1 +POSTHOOK: Output: default@escape3_1 +PREHOOK: query: DROP TABLE escape3_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@escape3_2 +PREHOOK: Output: default@escape3_2 +POSTHOOK: query: DROP TABLE escape3_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@escape3_2 +POSTHOOK: Output: default@escape3_2 +PREHOOK: query: DROP TABLE escape3_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@escape3_3 +PREHOOK: Output: default@escape3_3 +POSTHOOK: query: DROP TABLE escape3_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@escape3_3 +POSTHOOK: Output: default@escape3_3 diff --git a/ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out b/ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out index f556c97..664e065 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out @@ -188,6 +188,26 @@ POSTHOOK: query: select count(*), count(1), sum(1), sum(2), count(s), count(bo), POSTHOOK: type: QUERY #### A masked pattern was here #### 2219 2219 2219 4438 2219 2219 2219 2219 65791 4294967296 99.95999908447266 0.04 +PREHOOK: query: select count(*) from stats_tbl_part +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from stats_tbl_part +POSTHOOK: type: QUERY +#### A masked pattern was here #### +4541 +PREHOOK: query: select count(*)/2 from stats_tbl_part +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +PREHOOK: Input: default@stats_tbl_part@dt=2010 +PREHOOK: Input: default@stats_tbl_part@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: select count(*)/2 from stats_tbl_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +POSTHOOK: Input: default@stats_tbl_part@dt=2010 +POSTHOOK: Input: default@stats_tbl_part@dt=2014 +#### A masked pattern was here #### +2270.5 PREHOOK: query: drop table stats_tbl_part PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_tbl_part diff --git a/ql/src/test/results/clientpositive/serde_opencsv.q.out b/ql/src/test/results/clientpositive/serde_opencsv.q.out new file mode 100644 index 0000000..230c475 --- /dev/null +++ b/ql/src/test/results/clientpositive/serde_opencsv.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: EXPLAIN +CREATE TABLE serde_opencsv( + words STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +WITH SERDEPROPERTIES( + "separatorChar" = ",", + "quoteChar" = "'", + "escapeChar" = "\\" +) stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: EXPLAIN +CREATE TABLE serde_opencsv( + words STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +WITH SERDEPROPERTIES( + "separatorChar" = ",", + "quoteChar" = "'", + "escapeChar" = "\\" +) stored as textfile +POSTHOOK: type: CREATETABLE +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create Table Operator: + Create Table + columns: words string, int1 int, tinyint1 tinyint, smallint1 smallint, bigint1 bigint, boolean1 boolean, float1 float, double1 double + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.OpenCSVSerde + serde properties: + escapeChar \ + quoteChar ' + separatorChar , + name: default.serde_opencsv + +PREHOOK: query: CREATE TABLE serde_opencsv( + words STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +WITH SERDEPROPERTIES( + "separatorChar" = ",", + "quoteChar" = "'", + "escapeChar" = "\\" +) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@serde_opencsv +POSTHOOK: query: CREATE TABLE serde_opencsv( + words STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +WITH SERDEPROPERTIES( + "separatorChar" = ",", + "quoteChar" = "'", + "escapeChar" = "\\" +) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@serde_opencsv +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/opencsv-data.txt" INTO TABLE serde_opencsv +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@serde_opencsv +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/opencsv-data.txt" INTO TABLE serde_opencsv +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@serde_opencsv +PREHOOK: query: SELECT count(*) FROM serde_opencsv +PREHOOK: type: QUERY +PREHOOK: Input: default@serde_opencsv +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM serde_opencsv +POSTHOOK: type: QUERY +POSTHOOK: Input: default@serde_opencsv +#### A masked pattern was here #### +3 diff --git a/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out index 9fcb4f4..3c1a99f 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out @@ -6618,3 +6618,623 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### false 11.0 -11.0 -2.389090909090909 -17881597706 -1.7881597716175E10 3.8953387713327066E17 6.0 -0.8249999999999993 -2454.8879999999995 3.8953385925167296E17 -2145884705 1.66288903197104486E18 0.8249999999999993 4.7840233756130287E-17 4.098424268084119E-17 0.8249999999999993 -1051696618 28.692556844886422 2.980633855245E9 -4.032330473245E9 85.79562278396777 4.032330473245E9 -3983699.3106060605 3983699.3106060605 4.1896430920933255E15 true 79.553 -79.553 -0.33034580136836733 -401322621137 -4.01322621147175E11 7.9255373737244976E16 34.727455139160156 -69.3780014038086 4856.6352637899645 7.9254972414623824E16 -2130544867 2.30133924842409523E18 69.3780014038086 3.456813247089758E-17 2.0387240975807185E-18 69.3780014038086 2182477964777 34.654968050508266 2.959326820263E9 2.179518637956737E12 9461.197516216069 -2.179518637956737E12 4.592756659884259E8 -4.592756659884259E8 1.002359020778021E21 +PREHOOK: query: -- These tests verify COUNT on empty or null colulmns work correctly. +create table test_count(i int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_count +POSTHOOK: query: -- These tests verify COUNT on empty or null colulmns work correctly. +create table test_count(i int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_count +PREHOOK: query: explain +select count(*) from test_count +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from test_count +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_count + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from test_count +PREHOOK: type: QUERY +PREHOOK: Input: default@test_count +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_count +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_count +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(i) from test_count +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(i) from test_count +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_count + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: i + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(i) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(i) from test_count +PREHOOK: type: QUERY +PREHOOK: Input: default@test_count +#### A masked pattern was here #### +POSTHOOK: query: select count(i) from test_count +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_count +#### A masked pattern was here #### +0 +PREHOOK: query: create table alltypesnull like alltypesorc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesnull +POSTHOOK: query: create table alltypesnull like alltypesorc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesnull +PREHOOK: query: alter table alltypesnull set fileformat textfile +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@alltypesnull +PREHOOK: Output: default@alltypesnull +POSTHOOK: query: alter table alltypesnull set fileformat textfile +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@alltypesnull +POSTHOOK: Output: default@alltypesnull +PREHOOK: query: insert into table alltypesnull select null, null, null, null, null, null, null, null, null, null, null, null from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesnull +POSTHOOK: query: insert into table alltypesnull select null, null, null, null, null, null, null, null, null, null, null, null from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesnull +POSTHOOK: Lineage: alltypesnull.cbigint EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cboolean2 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cdouble EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cfloat EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cint EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.csmallint EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cstring1 SIMPLE [] +POSTHOOK: Lineage: alltypesnull.cstring2 SIMPLE [] +POSTHOOK: Lineage: alltypesnull.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.ctinyint EXPRESSION [] +PREHOOK: query: create table alltypesnullorc stored as orc as select * from alltypesnull +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesnull +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesnullorc +POSTHOOK: query: create table alltypesnullorc stored as orc as select * from alltypesnull +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesnull +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesnullorc +PREHOOK: query: explain +select count(*) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +12288 +PREHOOK: query: explain +select count(ctinyint) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(ctinyint) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ctinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(ctinyint) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(ctinyint) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cint) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cint) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cint) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cint) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cfloat) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cfloat) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cfloat) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cfloat) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cfloat) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cstring1) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cstring1) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring1 (type: string) + outputColumnNames: cstring1 + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cstring1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cstring1) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cstring1) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cboolean1) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cboolean1) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cboolean1 (type: boolean) + outputColumnNames: cboolean1 + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cboolean1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cboolean1) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cboolean1) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 diff --git a/ql/src/test/results/clientpositive/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/vectorization_short_regress.q.out index 1ca9ab9..8c4d1a4 100644 --- a/ql/src/test/results/clientpositive/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/vectorization_short_regress.q.out @@ -6615,3 +6615,567 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### false 11.0 -11.0 -2.389090909090909 -17881597706 -1.7881597716175E10 3.8953387713327066E17 6.0 -0.8249999999999993 -2454.8879999999995 3.8953385925167296E17 -2145884705 1.66288903197104486E18 0.8249999999999993 4.7840233756130287E-17 4.098424268084119E-17 0.8249999999999993 -1051696618 28.692556844886422 2.980633855245E9 -4.032330473245E9 85.79562278396777 4.032330473245E9 -3983699.3106060605 3983699.3106060605 4.1896430920933255E15 true 79.553 -79.553 -0.33034580136836733 -401322621137 -4.01322621147175E11 7.9255373737244976E16 34.727455139160156 -69.3780014038086 4856.6352637899645 7.9254972414623824E16 -2130544867 2.30133924842409523E18 69.3780014038086 3.456813247089758E-17 2.0387240975807185E-18 69.3780014038086 2182477964777 34.654968050508266 2.959326820263E9 2.179518637956737E12 9461.197516216069 -2.179518637956737E12 4.592756659884259E8 -4.592756659884259E8 1.002359020778021E21 +PREHOOK: query: -- These tests verify COUNT on empty or null colulmns work correctly. +create table test_count(i int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_count +POSTHOOK: query: -- These tests verify COUNT on empty or null colulmns work correctly. +create table test_count(i int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_count +PREHOOK: query: explain +select count(*) from test_count +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from test_count +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_count + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from test_count +PREHOOK: type: QUERY +PREHOOK: Input: default@test_count +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_count +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_count +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(i) from test_count +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(i) from test_count +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_count + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: i + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(i) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(i) from test_count +PREHOOK: type: QUERY +PREHOOK: Input: default@test_count +#### A masked pattern was here #### +POSTHOOK: query: select count(i) from test_count +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_count +#### A masked pattern was here #### +0 +PREHOOK: query: create table alltypesnull like alltypesorc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesnull +POSTHOOK: query: create table alltypesnull like alltypesorc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesnull +PREHOOK: query: alter table alltypesnull set fileformat textfile +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@alltypesnull +PREHOOK: Output: default@alltypesnull +POSTHOOK: query: alter table alltypesnull set fileformat textfile +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@alltypesnull +POSTHOOK: Output: default@alltypesnull +PREHOOK: query: insert into table alltypesnull select null, null, null, null, null, null, null, null, null, null, null, null from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesnull +POSTHOOK: query: insert into table alltypesnull select null, null, null, null, null, null, null, null, null, null, null, null from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesnull +POSTHOOK: Lineage: alltypesnull.cbigint EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cboolean2 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cdouble EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cfloat EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cint EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.csmallint EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.cstring1 SIMPLE [] +POSTHOOK: Lineage: alltypesnull.cstring2 SIMPLE [] +POSTHOOK: Lineage: alltypesnull.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesnull.ctinyint EXPRESSION [] +PREHOOK: query: create table alltypesnullorc stored as orc as select * from alltypesnull +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesnull +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesnullorc +POSTHOOK: query: create table alltypesnullorc stored as orc as select * from alltypesnull +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesnull +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesnullorc +PREHOOK: query: explain +select count(*) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +12288 +PREHOOK: query: explain +select count(ctinyint) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(ctinyint) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ctinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(ctinyint) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(ctinyint) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cint) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cint) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cint) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cint) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cfloat) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cfloat) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cfloat) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cfloat) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cfloat) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cstring1) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cstring1) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring1 (type: string) + outputColumnNames: cstring1 + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cstring1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cstring1) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cstring1) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(cboolean1) from alltypesnullorc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(cboolean1) from alltypesnullorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesnullorc + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cboolean1 (type: boolean) + outputColumnNames: cboolean1 + Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cboolean1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(cboolean1) from alltypesnullorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +POSTHOOK: query: select count(cboolean1) from alltypesnullorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesnullorc +#### A masked pattern was here #### +0 diff --git a/ql/src/test/results/compiler/errors/invalid_list_index.q.out b/ql/src/test/results/compiler/errors/invalid_list_index.q.out deleted file mode 100644 index a4179cd..0000000 --- a/ql/src/test/results/compiler/errors/invalid_list_index.q.out +++ /dev/null @@ -1,2 +0,0 @@ -Semantic Exception: -2:74 Non-constant expressions for array indexes not supported. Error encountered near token ''abc'' \ No newline at end of file diff --git a/ql/src/test/results/compiler/errors/invalid_list_index2.q.out b/ql/src/test/results/compiler/errors/invalid_list_index2.q.out deleted file mode 100644 index aaa9455..0000000 --- a/ql/src/test/results/compiler/errors/invalid_list_index2.q.out +++ /dev/null @@ -1,2 +0,0 @@ -Semantic Exception: -2:74 Non-constant expressions for array indexes not supported. Error encountered near token '2' \ No newline at end of file diff --git a/ql/src/test/results/compiler/errors/invalid_map_index.q.out b/ql/src/test/results/compiler/errors/invalid_map_index.q.out deleted file mode 100644 index 4eb7afa..0000000 --- a/ql/src/test/results/compiler/errors/invalid_map_index.q.out +++ /dev/null @@ -1,2 +0,0 @@ -Semantic Exception: -Line 2:56 MAP key type does not match index expression type '0' \ No newline at end of file diff --git a/ql/src/test/results/compiler/errors/invalid_map_index2.q.out b/ql/src/test/results/compiler/errors/invalid_map_index2.q.out deleted file mode 100644 index edc9bda..0000000 --- a/ql/src/test/results/compiler/errors/invalid_map_index2.q.out +++ /dev/null @@ -1,2 +0,0 @@ -Semantic Exception: -2:80 Non-constant expression for map indexes not supported. Error encountered near token ''abc'' \ No newline at end of file diff --git a/serde/pom.xml b/serde/pom.xml index f8bcc83..9f327f0 100644 --- a/serde/pom.xml +++ b/serde/pom.xml @@ -70,7 +70,13 @@ libthrift ${libthrift.version} - + + net.sf.opencsv + opencsv + ${opencsv.version} + + + junit junit diff --git a/serde/src/gen/thrift/gen-cpp/complex_types.cpp b/serde/src/gen/thrift/gen-cpp/complex_types.cpp index f0ede2c..411e1b0 100644 --- a/serde/src/gen/thrift/gen-cpp/complex_types.cpp +++ b/serde/src/gen/thrift/gen-cpp/complex_types.cpp @@ -622,4 +622,100 @@ void swap(Complex &a, Complex &b) { swap(a.__isset, b.__isset); } +const char* SetIntString::ascii_fingerprint = "842B41C940D05DFB16183142A90DFC54"; +const uint8_t SetIntString::binary_fingerprint[16] = {0x84,0x2B,0x41,0xC9,0x40,0xD0,0x5D,0xFB,0x16,0x18,0x31,0x42,0xA9,0x0D,0xFC,0x54}; + +uint32_t SetIntString::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_SET) { + { + this->sIntString.clear(); + uint32_t _size64; + ::apache::thrift::protocol::TType _etype67; + xfer += iprot->readSetBegin(_etype67, _size64); + uint32_t _i68; + for (_i68 = 0; _i68 < _size64; ++_i68) + { + IntString _elem69; + xfer += _elem69.read(iprot); + this->sIntString.insert(_elem69); + } + xfer += iprot->readSetEnd(); + } + this->__isset.sIntString = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->aString); + this->__isset.aString = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +uint32_t SetIntString::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + xfer += oprot->writeStructBegin("SetIntString"); + + xfer += oprot->writeFieldBegin("sIntString", ::apache::thrift::protocol::T_SET, 1); + { + xfer += oprot->writeSetBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sIntString.size())); + std::set ::const_iterator _iter70; + for (_iter70 = this->sIntString.begin(); _iter70 != this->sIntString.end(); ++_iter70) + { + xfer += (*_iter70).write(oprot); + } + xfer += oprot->writeSetEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("aString", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeString(this->aString); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +void swap(SetIntString &a, SetIntString &b) { + using ::std::swap; + swap(a.sIntString, b.sIntString); + swap(a.aString, b.aString); + swap(a.__isset, b.__isset); +} + diff --git a/serde/src/gen/thrift/gen-cpp/complex_types.h b/serde/src/gen/thrift/gen-cpp/complex_types.h index de9f5f9..3f4c760 100644 --- a/serde/src/gen/thrift/gen-cpp/complex_types.h +++ b/serde/src/gen/thrift/gen-cpp/complex_types.h @@ -296,6 +296,57 @@ class Complex { void swap(Complex &a, Complex &b); +typedef struct _SetIntString__isset { + _SetIntString__isset() : sIntString(false), aString(false) {} + bool sIntString; + bool aString; +} _SetIntString__isset; + +class SetIntString { + public: + + static const char* ascii_fingerprint; // = "842B41C940D05DFB16183142A90DFC54"; + static const uint8_t binary_fingerprint[16]; // = {0x84,0x2B,0x41,0xC9,0x40,0xD0,0x5D,0xFB,0x16,0x18,0x31,0x42,0xA9,0x0D,0xFC,0x54}; + + SetIntString() : aString() { + } + + virtual ~SetIntString() throw() {} + + std::set sIntString; + std::string aString; + + _SetIntString__isset __isset; + + void __set_sIntString(const std::set & val) { + sIntString = val; + } + + void __set_aString(const std::string& val) { + aString = val; + } + + bool operator == (const SetIntString & rhs) const + { + if (!(sIntString == rhs.sIntString)) + return false; + if (!(aString == rhs.aString)) + return false; + return true; + } + bool operator != (const SetIntString &rhs) const { + return !(*this == rhs); + } + + bool operator < (const SetIntString & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(SetIntString &a, SetIntString &b); + #endif diff --git a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 5e089aa..8d3595b 100644 --- a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -37,8 +37,6 @@ public static final String SERIALIZATION_CLASS = "serialization.class"; - public static final String SERIALIZATION_TYPE = "serialization.type"; - public static final String SERIALIZATION_FORMAT = "serialization.format"; public static final String SERIALIZATION_DDL = "serialization.ddl"; diff --git a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java index 41df559..07ea8b9 100644 --- a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java +++ b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java @@ -1267,7 +1267,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Complex struct) thr for (int _i28 = 0; _i28 < _map27.size; ++_i28) { String _key29; // required - String _val30; // optional + String _val30; // required _key29 = iprot.readString(); _val30 = iprot.readString(); struct.mStringString.put(_key29, _val30); @@ -1287,7 +1287,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Complex struct) thr for (int _i32 = 0; _i32 < _map31.size; ++_i32) { String _key33; // required - Map> _val34; // optional + Map> _val34; // required _key33 = iprot.readString(); { org.apache.thrift.protocol.TMap _map35 = iprot.readMapBegin(); @@ -1295,7 +1295,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Complex struct) thr for (int _i36 = 0; _i36 < _map35.size; ++_i36) { String _key37; // required - Map _val38; // optional + Map _val38; // required _key37 = iprot.readString(); { org.apache.thrift.protocol.TMap _map39 = iprot.readMapBegin(); @@ -1303,7 +1303,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Complex struct) thr for (int _i40 = 0; _i40 < _map39.size; ++_i40) { String _key41; // required - PropValueUnion _val42; // optional + PropValueUnion _val42; // required _key41 = iprot.readString(); _val42 = new PropValueUnion(); _val42.read(iprot); @@ -1651,7 +1651,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Complex struct) thro for (int _i67 = 0; _i67 < _map66.size; ++_i67) { String _key68; // required - String _val69; // optional + String _val69; // required _key68 = iprot.readString(); _val69 = iprot.readString(); struct.mStringString.put(_key68, _val69); @@ -1666,7 +1666,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Complex struct) thro for (int _i71 = 0; _i71 < _map70.size; ++_i71) { String _key72; // required - Map> _val73; // optional + Map> _val73; // required _key72 = iprot.readString(); { org.apache.thrift.protocol.TMap _map74 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.MAP, iprot.readI32()); @@ -1674,7 +1674,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Complex struct) thro for (int _i75 = 0; _i75 < _map74.size; ++_i75) { String _key76; // required - Map _val77; // optional + Map _val77; // required _key76 = iprot.readString(); { org.apache.thrift.protocol.TMap _map78 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); @@ -1682,7 +1682,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Complex struct) thro for (int _i79 = 0; _i79 < _map78.size; ++_i79) { String _key80; // required - PropValueUnion _val81; // optional + PropValueUnion _val81; // required _key80 = iprot.readString(); _val81 = new PropValueUnion(); _val81.read(iprot); diff --git a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/PropValueUnion.java b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/PropValueUnion.java index 0a5757e..aa56dc9 100644 --- a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/PropValueUnion.java +++ b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/PropValueUnion.java @@ -320,7 +320,7 @@ protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol ip for (int _i4 = 0; _i4 < _map3.size; ++_i4) { String _key5; // required - String _val6; // optional + String _val6; // required _key5 = iprot.readString(); _val6 = iprot.readString(); unionMStringString.put(_key5, _val6); @@ -438,7 +438,7 @@ protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot for (int _i13 = 0; _i13 < _map12.size; ++_i13) { String _key14; // required - String _val15; // optional + String _val15; // required _key14 = iprot.readString(); _val15 = iprot.readString(); unionMStringString.put(_key14, _val15); diff --git a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/SetIntString.java b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/SetIntString.java index 9b00404..676f2b2 100644 --- a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/SetIntString.java +++ b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/SetIntString.java @@ -6,6 +6,7 @@ */ package org.apache.hadoop.hive.serde2.thrift.test; +import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.thrift.scheme.IScheme; import org.apache.thrift.scheme.SchemeFactory; import org.apache.thrift.scheme.StandardScheme; @@ -42,8 +43,8 @@ schemes.put(TupleScheme.class, new SetIntStringTupleSchemeFactory()); } - public Set sIntString; // required - public String aString; // required + private Set sIntString; // required + private String aString; // required /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -176,9 +177,8 @@ public void addToSIntString(IntString elem) { return this.sIntString; } - public SetIntString setSIntString(Set sIntString) { + public void setSIntString(Set sIntString) { this.sIntString = sIntString; - return this; } public void unsetSIntString() { @@ -200,9 +200,8 @@ public String getAString() { return this.aString; } - public SetIntString setAString(String aString) { + public void setAString(String aString) { this.aString = aString; - return this; } public void unsetAString() { @@ -304,7 +303,19 @@ public boolean equals(SetIntString that) { @Override public int hashCode() { - return 0; + HashCodeBuilder builder = new HashCodeBuilder(); + + boolean present_sIntString = true && (isSetSIntString()); + builder.append(present_sIntString); + if (present_sIntString) + builder.append(sIntString); + + boolean present_aString = true && (isSetAString()); + builder.append(present_aString); + if (present_aString) + builder.append(aString); + + return builder.toHashCode(); } public int compareTo(SetIntString other) { @@ -416,14 +427,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SetIntString struct case 1: // S_INT_STRING if (schemeField.type == org.apache.thrift.protocol.TType.SET) { { - org.apache.thrift.protocol.TSet _set34 = iprot.readSetBegin(); - struct.sIntString = new HashSet(2*_set34.size); - for (int _i35 = 0; _i35 < _set34.size; ++_i35) + org.apache.thrift.protocol.TSet _set82 = iprot.readSetBegin(); + struct.sIntString = new HashSet(2*_set82.size); + for (int _i83 = 0; _i83 < _set82.size; ++_i83) { - IntString _elem36; // required - _elem36 = new IntString(); - _elem36.read(iprot); - struct.sIntString.add(_elem36); + IntString _elem84; // required + _elem84 = new IntString(); + _elem84.read(iprot); + struct.sIntString.add(_elem84); } iprot.readSetEnd(); } @@ -446,8 +457,6 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SetIntString struct iprot.readFieldEnd(); } iprot.readStructEnd(); - - // check for required fields of primitive type, which can't be checked in the validate method struct.validate(); } @@ -459,9 +468,9 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, SetIntString struc oprot.writeFieldBegin(S_INT_STRING_FIELD_DESC); { oprot.writeSetBegin(new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRUCT, struct.sIntString.size())); - for (IntString _iter37 : struct.sIntString) + for (IntString _iter85 : struct.sIntString) { - _iter37.write(oprot); + _iter85.write(oprot); } oprot.writeSetEnd(); } @@ -500,9 +509,9 @@ public void write(org.apache.thrift.protocol.TProtocol prot, SetIntString struct if (struct.isSetSIntString()) { { oprot.writeI32(struct.sIntString.size()); - for (IntString _iter38 : struct.sIntString) + for (IntString _iter86 : struct.sIntString) { - _iter38.write(oprot); + _iter86.write(oprot); } } } @@ -517,14 +526,14 @@ public void read(org.apache.thrift.protocol.TProtocol prot, SetIntString struct) BitSet incoming = iprot.readBitSet(2); if (incoming.get(0)) { { - org.apache.thrift.protocol.TSet _set39 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.sIntString = new HashSet(2*_set39.size); - for (int _i40 = 0; _i40 < _set39.size; ++_i40) + org.apache.thrift.protocol.TSet _set87 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); + struct.sIntString = new HashSet(2*_set87.size); + for (int _i88 = 0; _i88 < _set87.size; ++_i88) { - IntString _elem41; // required - _elem41 = new IntString(); - _elem41.read(iprot); - struct.sIntString.add(_elem41); + IntString _elem89; // required + _elem89 = new IntString(); + _elem89.read(iprot); + struct.sIntString.add(_elem89); } } struct.setSIntStringIsSet(true); diff --git a/serde/src/gen/thrift/gen-py/complex/ttypes.py b/serde/src/gen/thrift/gen-py/complex/ttypes.py index 3bc7a6f..c2748d0 100644 --- a/serde/src/gen/thrift/gen-py/complex/ttypes.py +++ b/serde/src/gen/thrift/gen-py/complex/ttypes.py @@ -487,3 +487,84 @@ def __eq__(self, other): def __ne__(self, other): return not (self == other) + +class SetIntString: + """ + Attributes: + - sIntString + - aString + """ + + thrift_spec = ( + None, # 0 + (1, TType.SET, 'sIntString', (TType.STRUCT,(IntString, IntString.thrift_spec)), None, ), # 1 + (2, TType.STRING, 'aString', None, None, ), # 2 + ) + + def __init__(self, sIntString=None, aString=None,): + self.sIntString = sIntString + self.aString = aString + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.SET: + self.sIntString = set() + (_etype76, _size73) = iprot.readSetBegin() + for _i77 in xrange(_size73): + _elem78 = IntString() + _elem78.read(iprot) + self.sIntString.add(_elem78) + iprot.readSetEnd() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.aString = iprot.readString(); + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('SetIntString') + if self.sIntString is not None: + oprot.writeFieldBegin('sIntString', TType.SET, 1) + oprot.writeSetBegin(TType.STRUCT, len(self.sIntString)) + for iter79 in self.sIntString: + iter79.write(oprot) + oprot.writeSetEnd() + oprot.writeFieldEnd() + if self.aString is not None: + oprot.writeFieldBegin('aString', TType.STRING, 2) + oprot.writeString(self.aString) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) diff --git a/serde/src/gen/thrift/gen-rb/complex_types.rb b/serde/src/gen/thrift/gen-rb/complex_types.rb index 2c91cbc..ffbd4d7 100644 --- a/serde/src/gen/thrift/gen-rb/complex_types.rb +++ b/serde/src/gen/thrift/gen-rb/complex_types.rb @@ -119,3 +119,21 @@ class Complex ::Thrift::Struct.generate_accessors self end +class SetIntString + include ::Thrift::Struct, ::Thrift::Struct_Union + SINTSTRING = 1 + ASTRING = 2 + + FIELDS = { + SINTSTRING => {:type => ::Thrift::Types::SET, :name => 'sIntString', :element => {:type => ::Thrift::Types::STRUCT, :class => ::IntString}}, + ASTRING => {:type => ::Thrift::Types::STRING, :name => 'aString'} + } + + def struct_fields; FIELDS; end + + def validate + end + + ::Thrift::Struct.generate_accessors self +end + diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/OpenCSVSerde.java b/serde/src/java/org/apache/hadoop/hive/serde2/OpenCSVSerde.java new file mode 100644 index 0000000..ac6c6dd --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/OpenCSVSerde.java @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +import java.io.CharArrayReader; +import java.io.IOException; +import java.io.Reader; +import java.io.StringWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import au.com.bytecode.opencsv.CSVReader; +import au.com.bytecode.opencsv.CSVWriter; + +/** + * OpenCSVSerde use opencsv to deserialize CSV format. + * Users can specify custom separator, quote or escape characters. And the default separator(\), + * quote("), and escape characters(\) are the same as the opencsv library. + * + */ +public final class OpenCSVSerde extends AbstractSerDe { + + public static final Log LOG = LogFactory.getLog(OpenCSVSerde.class.getName()); + private ObjectInspector inspector; + private String[] outputFields; + private int numCols; + private List row; + + private char separatorChar; + private char quoteChar; + private char escapeChar; + + public static final String SEPARATORCHAR = "separatorChar"; + public static final String QUOTECHAR = "quoteChar"; + public static final String ESCAPECHAR = "escapeChar"; + + @Override + public void initialize(final Configuration conf, final Properties tbl) throws SerDeException { + + final List columnNames = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS) + .split(",")); + + numCols = columnNames.size(); + + final List columnOIs = new ArrayList(numCols); + + for (int i = 0; i < numCols; i++) { + columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + } + + inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); + outputFields = new String[numCols]; + row = new ArrayList(numCols); + + for (int i = 0; i < numCols; i++) { + row.add(null); + } + + separatorChar = getProperty(tbl, SEPARATORCHAR, CSVWriter.DEFAULT_SEPARATOR); + quoteChar = getProperty(tbl, QUOTECHAR, CSVWriter.DEFAULT_QUOTE_CHARACTER); + escapeChar = getProperty(tbl, ESCAPECHAR, CSVWriter.DEFAULT_ESCAPE_CHARACTER); + } + + private char getProperty(final Properties tbl, final String property, final char def) { + final String val = tbl.getProperty(property); + + if (val != null) { + return val.charAt(0); + } + + return def; + } + + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { + final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector; + final List outputFieldRefs = outputRowOI.getAllStructFieldRefs(); + + if (outputFieldRefs.size() != numCols) { + throw new SerDeException("Cannot serialize the object because there are " + + outputFieldRefs.size() + " fields but the table has " + numCols + " columns."); + } + + // Get all data out. + for (int c = 0; c < numCols; c++) { + final Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c)); + final ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector(); + + // The data must be of type String + final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI; + + // Convert the field to Java class String, because objects of String type + // can be stored in String, Text, or some other classes. + outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field); + } + + final StringWriter writer = new StringWriter(); + final CSVWriter csv = newWriter(writer, separatorChar, quoteChar, escapeChar); + + try { + csv.writeNext(outputFields); + csv.close(); + + return new Text(writer.toString()); + } catch (final IOException ioe) { + throw new SerDeException(ioe); + } + } + + @Override + public Object deserialize(final Writable blob) throws SerDeException { + Text rowText = (Text) blob; + + CSVReader csv = null; + try { + csv = newReader(new CharArrayReader(rowText.toString().toCharArray()), separatorChar, + quoteChar, escapeChar); + final String[] read = csv.readNext(); + + for (int i = 0; i < numCols; i++) { + if (read != null && i < read.length) { + row.set(i, read[i]); + } else { + row.set(i, null); + } + } + + return row; + } catch (final Exception e) { + throw new SerDeException(e); + } finally { + if (csv != null) { + try { + csv.close(); + } catch (final Exception e) { + LOG.error("fail to close csv writer ", e); + } + } + } + } + + private CSVReader newReader(final Reader reader, char separator, char quote, char escape) { + // CSVReader will throw an exception if any of separator, quote, or escape is the same, but + // the CSV format specifies that the escape character and quote char are the same... very weird + if (CSVWriter.DEFAULT_ESCAPE_CHARACTER == escape) { + return new CSVReader(reader, separator, quote); + } else { + return new CSVReader(reader, separator, quote, escape); + } + } + + private CSVWriter newWriter(final Writer writer, char separator, char quote, char escape) { + if (CSVWriter.DEFAULT_ESCAPE_CHARACTER == escape) { + return new CSVWriter(writer, separator, quote, ""); + } else { + return new CSVWriter(writer, separator, quote, escape, ""); + } + } + + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return inspector; + } + + @Override + public Class getSerializedClass() { + return Text.class; + } + + @Override + public SerDeStats getSerDeStats() { + return null; + } +} diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java index ef469eb..3799c7c 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java @@ -55,14 +55,24 @@ public void setValue(LazyHiveChar copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { - String byteData = null; - try { - byteData = Text.decode(bytes.getData(), start, length); - data.set(byteData, maxLength); + if (oi.isEscaped()) { + Text textData = data.getTextValue(); + // This is doing a lot of copying here, this could be improved by enforcing length + // at the same time as escaping rather than as separate steps. + LazyUtils.copyAndEscapeStringDataToText(bytes.getData(), start, length, + oi.getEscapeChar(),textData); + data.set(textData.toString(), maxLength); isNull = false; - } catch (CharacterCodingException e) { - isNull = true; - LOG.debug("Data not in the HiveChar data type range so converted to null.", e); + } else { + String byteData = null; + try { + byteData = Text.decode(bytes.getData(), start, length); + data.set(byteData, maxLength); + isNull = false; + } catch (CharacterCodingException e) { + isNull = true; + LOG.debug("Data not in the HiveChar data type range so converted to null.", e); + } } } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java index bc8d41e..b4659e7 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java @@ -55,14 +55,24 @@ public void setValue(LazyHiveVarchar copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { - String byteData = null; - try { - byteData = Text.decode(bytes.getData(), start, length); - data.set(byteData, maxLength); + if (oi.isEscaped()) { + Text textData = data.getTextValue(); + // This is doing a lot of copying here, this could be improved by enforcing length + // at the same time as escaping rather than as separate steps. + LazyUtils.copyAndEscapeStringDataToText(bytes.getData(), start, length, + oi.getEscapeChar(),textData); + data.set(textData.toString(), maxLength); isNull = false; - } catch (CharacterCodingException e) { - isNull = true; - LOG.debug("Data not in the HiveVarchar data type range so converted to null.", e); + } else { + try { + String byteData = null; + byteData = Text.decode(bytes.getData(), start, length); + data.set(byteData, maxLength); + isNull = false; + } catch (CharacterCodingException e) { + isNull = true; + LOG.debug("Data not in the HiveVarchar data type range so converted to null.", e); + } } } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java index 28b3f86..75b9556 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java @@ -40,40 +40,7 @@ public void init(ByteArrayRef bytes, int start, int length) { if (oi.isEscaped()) { byte escapeChar = oi.getEscapeChar(); byte[] inputBytes = bytes.getData(); - - // First calculate the length of the output string - int outputLength = 0; - for (int i = 0; i < length; i++) { - if (inputBytes[start + i] != escapeChar) { - outputLength++; - } else { - outputLength++; - i++; - } - } - - // Copy the data over, so that the internal state of Text will be set to - // the required outputLength. - data.set(bytes.getData(), start, outputLength); - - // We need to copy the data byte by byte only in case the - // "outputLength < length" (which means there is at least one escaped - // byte. - if (outputLength < length) { - int k = 0; - byte[] outputBytes = data.getBytes(); - for (int i = 0; i < length; i++) { - byte b = inputBytes[start + i]; - if (b != escapeChar || i == length - 1) { - outputBytes[k++] = b; - } else { - // get the next byte - i++; - outputBytes[k++] = inputBytes[start + i]; - } - } - assert (k == outputLength); - } + LazyUtils.copyAndEscapeStringDataToText(inputBytes, start, length, escapeChar, data); } else { // if the data is not escaped, simply copy the data. data.set(bytes.getData(), start, length); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 1d62422..3943508 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -412,6 +412,44 @@ static byte getSeparator(byte[] separators, int level) throws SerDeException { } } + public static void copyAndEscapeStringDataToText(byte[] inputBytes, int start, int length, + byte escapeChar, Text data) { + + // First calculate the length of the output string + int outputLength = 0; + for (int i = 0; i < length; i++) { + if (inputBytes[start + i] != escapeChar) { + outputLength++; + } else { + outputLength++; + i++; + } + } + + // Copy the data over, so that the internal state of Text will be set to + // the required outputLength. + data.set(inputBytes, start, outputLength); + + // We need to copy the data byte by byte only in case the + // "outputLength < length" (which means there is at least one escaped + // byte. + if (outputLength < length) { + int k = 0; + byte[] outputBytes = data.getBytes(); + for (int i = 0; i < length; i++) { + byte b = inputBytes[start + i]; + if (b != escapeChar || i == length - 1) { + outputBytes[k++] = b; + } else { + // get the next byte + i++; + outputBytes[k++] = inputBytes[start + i]; + } + } + assert (k == outputLength); + } + } + private LazyUtils() { // prevent instantiation } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveCharObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveCharObjectInspector.java index 65fb1ab..2b0ad15 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveCharObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveCharObjectInspector.java @@ -29,12 +29,21 @@ extends AbstractPrimitiveLazyObjectInspector implements HiveCharObjectInspector { + private boolean escaped; + private byte escapeChar; + // no-arg ctor required for Kyro public LazyHiveCharObjectInspector() { } public LazyHiveCharObjectInspector(CharTypeInfo typeInfo) { + this(typeInfo, false, (byte)0); + } + + public LazyHiveCharObjectInspector(CharTypeInfo typeInfo, boolean escaped, byte escapeChar) { super(typeInfo); + this.escaped = escaped; + this.escapeChar = escapeChar; } @Override @@ -63,6 +72,14 @@ public HiveChar getPrimitiveJavaObject(Object o) { return ret; } + public boolean isEscaped() { + return escaped; + } + + public byte getEscapeChar() { + return escapeChar; + } + @Override public String toString() { return getTypeName(); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java index c802ed0..8e7acce 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java @@ -29,12 +29,21 @@ extends AbstractPrimitiveLazyObjectInspector implements HiveVarcharObjectInspector { + private boolean escaped; + private byte escapeChar; + // no-arg ctor required for Kyro public LazyHiveVarcharObjectInspector() { } public LazyHiveVarcharObjectInspector(VarcharTypeInfo typeInfo) { + this(typeInfo, false, (byte)0); + } + + public LazyHiveVarcharObjectInspector(VarcharTypeInfo typeInfo, boolean escaped, byte escapeChar) { super(typeInfo); + this.escaped = escaped; + this.escapeChar = escapeChar; } @Override @@ -63,6 +72,14 @@ public HiveVarchar getPrimitiveJavaObject(Object o) { return ret; } + public boolean isEscaped() { + return escaped; + } + + public byte getEscapeChar() { + return escapeChar; + } + @Override public String toString() { return getTypeName(); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java index 734b9d8..08fec77 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -78,8 +78,10 @@ private LazyPrimitiveObjectInspectorFactory() { // prevent instantiation } - private static HashMap, LazyStringObjectInspector> cachedLazyStringObjectInspector = - new HashMap, LazyStringObjectInspector>(); + // Lazy object inspectors for string/char/varchar will all be cached in the same map. + // Map key will be list of [typeInfo, isEscaped, escapeChar] + private static HashMap, AbstractPrimitiveLazyObjectInspector> cachedLazyStringTypeOIs = + new HashMap, AbstractPrimitiveLazyObjectInspector>(); private static Map> cachedPrimitiveLazyObjectInspectors = @@ -121,6 +123,10 @@ private LazyPrimitiveObjectInspectorFactory() { switch(primitiveCategory) { case STRING: return getLazyStringObjectInspector(escaped, escapeChar); + case CHAR: + return getLazyHiveCharObjectInspector((CharTypeInfo)typeInfo, escaped, escapeChar); + case VARCHAR: + return getLazyHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo, escaped, escapeChar); case BOOLEAN: return getLazyBooleanObjectInspector(extBoolean); default: @@ -157,13 +163,44 @@ private LazyPrimitiveObjectInspectorFactory() { public static LazyStringObjectInspector getLazyStringObjectInspector(boolean escaped, byte escapeChar) { ArrayList signature = new ArrayList(); + signature.add(TypeInfoFactory.stringTypeInfo); signature.add(Boolean.valueOf(escaped)); signature.add(Byte.valueOf(escapeChar)); - LazyStringObjectInspector result = cachedLazyStringObjectInspector + LazyStringObjectInspector result = (LazyStringObjectInspector) cachedLazyStringTypeOIs .get(signature); if (result == null) { result = new LazyStringObjectInspector(escaped, escapeChar); - cachedLazyStringObjectInspector.put(signature, result); + cachedLazyStringTypeOIs.put(signature, result); + } + return result; + } + + public static LazyHiveCharObjectInspector getLazyHiveCharObjectInspector( + CharTypeInfo typeInfo, boolean escaped, byte escapeChar) { + ArrayList signature = new ArrayList(); + signature.add(typeInfo); + signature.add(Boolean.valueOf(escaped)); + signature.add(Byte.valueOf(escapeChar)); + LazyHiveCharObjectInspector result = (LazyHiveCharObjectInspector) cachedLazyStringTypeOIs + .get(signature); + if (result == null) { + result = new LazyHiveCharObjectInspector(typeInfo, escaped, escapeChar); + cachedLazyStringTypeOIs.put(signature, result); + } + return result; + } + + public static LazyHiveVarcharObjectInspector getLazyHiveVarcharObjectInspector( + VarcharTypeInfo typeInfo, boolean escaped, byte escapeChar) { + ArrayList signature = new ArrayList(); + signature.add(typeInfo); + signature.add(Boolean.valueOf(escaped)); + signature.add(Byte.valueOf(escapeChar)); + LazyHiveVarcharObjectInspector result = (LazyHiveVarcharObjectInspector) cachedLazyStringTypeOIs + .get(signature); + if (result == null) { + result = new LazyHiveVarcharObjectInspector(typeInfo, escaped, escapeChar); + cachedLazyStringTypeOIs.put(signature, result); } return result; } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java index c340dcb..d16e313 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java @@ -91,6 +91,9 @@ public Object copyObject(Object o) { @Override public Object set(Object o, HiveChar value) { + if (value == null) { + return null; + } HiveCharWritable writable = (HiveCharWritable) o; writable.set(value, getMaxLength()); return o; @@ -98,6 +101,9 @@ public Object set(Object o, HiveChar value) { @Override public Object set(Object o, String value) { + if (value == null) { + return null; + } HiveCharWritable writable = (HiveCharWritable) o; writable.set(value, getMaxLength()); return o; diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java index 7bbfd1f..28c9080 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java @@ -96,6 +96,9 @@ public Object copyObject(Object o) { @Override public Object set(Object o, HiveVarchar value) { + if (value == null) { + return null; + } HiveVarcharWritable writable = (HiveVarcharWritable)o; writable.set(value, getMaxLength()); return o; @@ -103,6 +106,9 @@ public Object set(Object o, HiveVarchar value) { @Override public Object set(Object o, String value) { + if (value == null) { + return null; + } HiveVarcharWritable writable = (HiveVarcharWritable)o; writable.set(value, getMaxLength()); return o; diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/TestOpenCSVSerde.java b/serde/src/test/org/apache/hadoop/hive/serde2/TestOpenCSVSerde.java new file mode 100644 index 0000000..99fa553 --- /dev/null +++ b/serde/src/test/org/apache/hadoop/hive/serde2/TestOpenCSVSerde.java @@ -0,0 +1,82 @@ +package org.apache.hadoop.hive.serde2; + +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.io.Text; +import org.junit.Before; +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestOpenCSVSerde { + private final OpenCSVSerde csv = new OpenCSVSerde(); + private final Properties props = new Properties(); + + @Before + public void setup() throws Exception { + props.setProperty(serdeConstants.LIST_COLUMNS, "a,b,c"); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string"); + } + + @Test + public void testDeserialize() throws Exception { + csv.initialize(null, props); + final Text in = new Text("hello,\"yes, okay\",1"); + + final List row = (List) csv.deserialize(in); + + assertEquals("hello", row.get(0)); + assertEquals("yes, okay", row.get(1)); + assertEquals("1", row.get(2)); + } + + + @Test + public void testDeserializeCustomSeparators() throws Exception { + props.setProperty(OpenCSVSerde.SEPARATORCHAR, "\t"); + props.setProperty(OpenCSVSerde.QUOTECHAR, "'"); + + csv.initialize(null, props); + + final Text in = new Text("hello\t'yes\tokay'\t1"); + final List row = (List) csv.deserialize(in); + + assertEquals("hello", row.get(0)); + assertEquals("yes\tokay", row.get(1)); + assertEquals("1", row.get(2)); + } + + @Test + public void testDeserializeCustomEscape() throws Exception { + props.setProperty(OpenCSVSerde.QUOTECHAR, "'"); + props.setProperty(OpenCSVSerde.ESCAPECHAR, "\\"); + + csv.initialize(null, props); + + final Text in = new Text("hello,'yes\\'okay',1"); + final List row = (List) csv.deserialize(in); + + assertEquals("hello", row.get(0)); + assertEquals("yes'okay", row.get(1)); + assertEquals("1", row.get(2)); + } +} diff --git a/service/src/java/org/apache/hive/service/cli/CLIService.java b/service/src/java/org/apache/hive/service/cli/CLIService.java index a0bc905..b46c5b4 100644 --- a/service/src/java/org/apache/hive/service/cli/CLIService.java +++ b/service/src/java/org/apache/hive/service/cli/CLIService.java @@ -66,7 +66,7 @@ private UserGroupInformation httpUGI; public CLIService() { - super("CLIService"); + super(CLIService.class.getSimpleName()); } @Override @@ -201,8 +201,7 @@ public SessionHandle openSessionWithImpersonation(String username, String passwo * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle) */ @Override - public void closeSession(SessionHandle sessionHandle) - throws HiveSQLException { + public void closeSession(SessionHandle sessionHandle) throws HiveSQLException { sessionManager.closeSession(sessionHandle); LOG.debug(sessionHandle + ": closeSession()"); } @@ -470,4 +469,8 @@ public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory au sessionManager.getSession(sessionHandle).renewDelegationToken(authFactory, tokenStr); LOG.info(sessionHandle + ": renewDelegationToken()"); } + + public SessionManager getSessionManager() { + return sessionManager; + } } diff --git a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java index f5a8f27..a57b6e5 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java +++ b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java @@ -47,7 +47,7 @@ new HashMap(); public OperationManager() { - super("OperationManager"); + super(OperationManager.class.getSimpleName()); } @Override diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index b0bb8be..5231d5e 100644 --- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -166,8 +166,8 @@ private void processGlobalInitFile() { IHiveFileProcessor processor = new GlobalHivercFileProcessor(); try { - if (hiveConf.getVar(ConfVars.HIVE_GLOBAL_INIT_FILE_LOCATION) != null) { - String hiverc = hiveConf.getVar(ConfVars.HIVE_GLOBAL_INIT_FILE_LOCATION) + if (hiveConf.getVar(ConfVars.HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION) != null) { + String hiverc = hiveConf.getVar(ConfVars.HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION) + File.separator + SessionManager.HIVERCFILE; if (new File(hiverc).exists()) { LOG.info("Running global init file: " + hiverc); diff --git a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java index 11d25cc..4654acc 100644 --- a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java +++ b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java @@ -67,7 +67,7 @@ private volatile boolean shutdown; public SessionManager() { - super("SessionManager"); + super(SessionManager.class.getSimpleName()); } @Override @@ -356,5 +356,9 @@ private void executeSessionHooks(HiveSession session) throws Exception { return backgroundOperationPool.submit(r); } + public int getOpenSessionCount() { + return handleToSession.size(); + } + } diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java index 2b80adc..028d55e 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java @@ -18,7 +18,6 @@ package org.apache.hive.service.cli.thrift; -import java.net.InetSocketAddress; import java.util.concurrent.ExecutorService; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -40,72 +39,54 @@ public class ThriftBinaryCLIService extends ThriftCLIService { public ThriftBinaryCLIService(CLIService cliService) { - super(cliService, "ThriftBinaryCLIService"); + super(cliService, ThriftBinaryCLIService.class.getSimpleName()); } @Override public void run() { try { - hiveAuthFactory = new HiveAuthFactory(hiveConf); - TTransportFactory transportFactory = hiveAuthFactory.getAuthTransFactory(); - TProcessorFactory processorFactory = hiveAuthFactory.getAuthProcFactory(this); - - String portString = System.getenv("HIVE_SERVER2_THRIFT_PORT"); - if (portString != null) { - portNum = Integer.valueOf(portString); - } else { - portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_PORT); - } - - String hiveHost = System.getenv("HIVE_SERVER2_THRIFT_BIND_HOST"); - if (hiveHost == null) { - hiveHost = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST); - } - - if (hiveHost != null && !hiveHost.isEmpty()) { - serverAddress = new InetSocketAddress(hiveHost, portNum); - } else { - serverAddress = new InetSocketAddress(portNum); - } - - minWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MIN_WORKER_THREADS); - maxWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS); - workerKeepAliveTime = hiveConf.getTimeVar( - ConfVars.HIVE_SERVER2_THRIFT_WORKER_KEEPALIVE_TIME, TimeUnit.SECONDS); + // Server thread pool String threadPoolName = "HiveServer2-Handler-Pool"; ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads, workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue(), new ThreadFactoryWithGarbageCleanup(threadPoolName)); + // Thrift configs + hiveAuthFactory = new HiveAuthFactory(hiveConf); + TTransportFactory transportFactory = hiveAuthFactory.getAuthTransFactory(); + TProcessorFactory processorFactory = hiveAuthFactory.getAuthProcFactory(this); TServerSocket serverSocket = null; if (!hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL)) { serverSocket = HiveAuthFactory.getServerSocket(hiveHost, portNum); } else { String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim(); if (keyStorePath.isEmpty()) { - throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname + - " Not configured for SSL connection"); + throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname + + " Not configured for SSL connection"); } String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf, HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname); - serverSocket = HiveAuthFactory.getServerSSLSocket(hiveHost, portNum, - keyStorePath, keyStorePassword); + serverSocket = HiveAuthFactory.getServerSSLSocket(hiveHost, portNum, keyStorePath, + keyStorePassword); } + + // Server args TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket) - .processorFactory(processorFactory) - .transportFactory(transportFactory) - .protocolFactory(new TBinaryProtocol.Factory()) - .executorService(executorService); + .processorFactory(processorFactory).transportFactory(transportFactory) + .protocolFactory(new TBinaryProtocol.Factory()).executorService(executorService); + // TCP Server server = new TThreadPoolServer(sargs); - - LOG.info("ThriftBinaryCLIService listening on " + serverAddress); - server.serve(); - + String msg = "Started " + ThriftBinaryCLIService.class.getSimpleName() + " on port " + + portNum + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads"; + LOG.info(msg); } catch (Throwable t) { - LOG.error("Error: ", t); + LOG.fatal( + "Error starting HiveServer2: could not start " + + ThriftBinaryCLIService.class.getSimpleName(), t); + System.exit(-1); } - } + } diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java index 443c371..c4b273c 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java @@ -22,6 +22,7 @@ import java.net.InetSocketAddress; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.TimeUnit; import javax.security.auth.login.LoginException; @@ -34,6 +35,7 @@ import org.apache.hive.service.auth.TSetIpAddressProcessor; import org.apache.hive.service.cli.*; import org.apache.hive.service.cli.session.SessionManager; +import org.apache.hive.service.server.HiveServer2; import org.apache.thrift.TException; import org.apache.thrift.server.TServer; @@ -48,9 +50,11 @@ protected CLIService cliService; private static final TStatus OK_STATUS = new TStatus(TStatusCode.SUCCESS_STATUS); private static final TStatus ERROR_STATUS = new TStatus(TStatusCode.ERROR_STATUS); + protected static HiveAuthFactory hiveAuthFactory; protected int portNum; protected InetSocketAddress serverAddress; + protected String hiveHost; protected TServer server; protected org.eclipse.jetty.server.Server httpServer; @@ -62,8 +66,7 @@ protected int minWorkerThreads; protected int maxWorkerThreads; protected long workerKeepAliveTime; - - protected static HiveAuthFactory hiveAuthFactory; + private HiveServer2 hiveServer2; public ThriftCLIService(CLIService cliService, String serviceName) { super(serviceName); @@ -73,6 +76,43 @@ public ThriftCLIService(CLIService cliService, String serviceName) { @Override public synchronized void init(HiveConf hiveConf) { this.hiveConf = hiveConf; + + // Initialize common server configs needed in both binary & http modes + String portString; + hiveHost = System.getenv("HIVE_SERVER2_THRIFT_BIND_HOST"); + if (hiveHost == null) { + hiveHost = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST); + } + // HTTP mode + if (HiveServer2.isHTTPTransportMode(hiveConf)) { + workerKeepAliveTime = + hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_WORKER_KEEPALIVE_TIME, + TimeUnit.SECONDS); + portString = System.getenv("HIVE_SERVER2_THRIFT_HTTP_PORT"); + if (portString != null) { + portNum = Integer.valueOf(portString); + } else { + portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT); + } + } + // Binary mode + else { + workerKeepAliveTime = + hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_WORKER_KEEPALIVE_TIME, TimeUnit.SECONDS); + portString = System.getenv("HIVE_SERVER2_THRIFT_PORT"); + if (portString != null) { + portNum = Integer.valueOf(portString); + } else { + portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_PORT); + } + } + if (hiveHost != null && !hiveHost.isEmpty()) { + serverAddress = new InetSocketAddress(hiveHost, portNum); + } else { + serverAddress = new InetSocketAddress(portNum); + } + minWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MIN_WORKER_THREADS); + maxWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS); super.init(hiveConf); } @@ -105,6 +145,14 @@ public synchronized void stop() { super.stop(); } + public int getPortNumber() { + return portNum; + } + + public InetSocketAddress getServerAddress() { + return serverAddress; + } + @Override public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws TException { @@ -308,6 +356,24 @@ public TCloseSessionResp CloseSession(TCloseSessionReq req) throws TException { } catch (Exception e) { LOG.warn("Error closing session: ", e); resp.setStatus(HiveSQLException.toTStatus(e)); + } finally { + if (!(isEmbedded) && (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) + && (!hiveServer2.isRegisteredWithZooKeeper())) { + // Asynchronously shutdown this instance of HiveServer2, + // if there are no active client sessions + if (cliService.getSessionManager().getOpenSessionCount() == 0) { + LOG.info("This instance of HiveServer2 has been removed from the list of server " + + "instances available for dynamic service discovery. " + + "The last client session has ended - will shutdown now."); + Thread shutdownThread = new Thread() { + @Override + public void run() { + hiveServer2.stop(); + } + }; + shutdownThread.start(); + } + } } return resp; } @@ -591,5 +657,9 @@ private boolean isKerberosAuthMode() { .equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString()); } + public void setHiveServer2(HiveServer2 hiveServer2) { + this.hiveServer2 = hiveServer2; + } + } diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java index 4067106..795115e 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java @@ -48,100 +48,94 @@ public class ThriftHttpCLIService extends ThriftCLIService { public ThriftHttpCLIService(CLIService cliService) { - super(cliService, "ThriftHttpCLIService"); + super(cliService, ThriftHttpCLIService.class.getSimpleName()); } + /** + * Configure Jetty to serve http requests. Example of a client connection URL: + * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ, + * e.g. http://gateway:port/hive2/servlets/thrifths2/ + */ @Override public void run() { try { - // Configure Jetty to serve http requests - // Example of a client connection URL: http://localhost:10000/servlets/thrifths2/ - // a gateway may cause actual target URL to differ, e.g. http://gateway:port/hive2/servlets/thrifths2/ - + // Verify config validity verifyHttpConfiguration(hiveConf); - String portString = System.getenv("HIVE_SERVER2_THRIFT_HTTP_PORT"); - if (portString != null) { - portNum = Integer.valueOf(portString); - } else { - portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT); - } - - minWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_MIN_WORKER_THREADS); - maxWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_MAX_WORKER_THREADS); - workerKeepAliveTime = hiveConf.getTimeVar( - ConfVars.HIVE_SERVER2_THRIFT_HTTP_WORKER_KEEPALIVE_TIME, TimeUnit.SECONDS); - - String httpPath = getHttpPath(hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_HTTP_PATH)); - + // HTTP Server httpServer = new org.eclipse.jetty.server.Server(); + + // Server thread pool String threadPoolName = "HiveServer2-HttpHandler-Pool"; ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads, workerKeepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue(), new ThreadFactoryWithGarbageCleanup(threadPoolName)); - ExecutorThreadPool threadPool = new ExecutorThreadPool(executorService); httpServer.setThreadPool(threadPool); - SelectChannelConnector connector = new SelectChannelConnector();; + // Connector configs + SelectChannelConnector connector = new SelectChannelConnector(); boolean useSsl = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL); String schemeName = useSsl ? "https" : "http"; - String authType = hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION); - // Set during the init phase of HiveServer2 if auth mode is kerberos - // UGI for the hive/_HOST (kerberos) principal - UserGroupInformation serviceUGI = cliService.getServiceUGI(); - // UGI for the http/_HOST (SPNego) principal - UserGroupInformation httpUGI = cliService.getHttpUGI(); - + // Change connector if SSL is used if (useSsl) { String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim(); String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf, HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname); if (keyStorePath.isEmpty()) { - throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname + - " Not configured for SSL connection"); + throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname + + " Not configured for SSL connection"); } SslContextFactory sslContextFactory = new SslContextFactory(); sslContextFactory.setKeyStorePath(keyStorePath); sslContextFactory.setKeyStorePassword(keyStorePassword); connector = new SslSelectChannelConnector(sslContextFactory); } - connector.setPort(portNum); // Linux:yes, Windows:no connector.setReuseAddress(!Shell.WINDOWS); - - int maxIdleTime = (int) hiveConf.getTimeVar( - ConfVars.HIVE_SERVER2_THRIFT_HTTP_MAX_IDLE_TIME, TimeUnit.MILLISECONDS); + int maxIdleTime = (int) hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_MAX_IDLE_TIME, + TimeUnit.MILLISECONDS); connector.setMaxIdleTime(maxIdleTime); - + httpServer.addConnector(connector); + // Thrift configs hiveAuthFactory = new HiveAuthFactory(hiveConf); TProcessorFactory processorFactory = hiveAuthFactory.getAuthProcFactory(this); TProcessor processor = processorFactory.getProcessor(null); - TProtocolFactory protocolFactory = new TBinaryProtocol.Factory(); + // Set during the init phase of HiveServer2 if auth mode is kerberos + // UGI for the hive/_HOST (kerberos) principal + UserGroupInformation serviceUGI = cliService.getServiceUGI(); + // UGI for the http/_HOST (SPNego) principal + UserGroupInformation httpUGI = cliService.getHttpUGI(); + String authType = hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION); + TServlet thriftHttpServlet = new ThriftHttpServlet(processor, protocolFactory, authType, + serviceUGI, httpUGI); - TServlet thriftHttpServlet = new ThriftHttpServlet(processor, protocolFactory, - authType, serviceUGI, httpUGI); - + // Context handler final ServletContextHandler context = new ServletContextHandler( ServletContextHandler.SESSIONS); context.setContextPath("/"); - + String httpPath = getHttpPath(hiveConf + .getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_HTTP_PATH)); httpServer.setHandler(context); context.addServlet(new ServletHolder(thriftHttpServlet), httpPath); // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyRecieveDuration, etc. + // Finally, start the server httpServer.start(); - String msg = "Started ThriftHttpCLIService in " + schemeName + " mode on port " + portNum + - " path=" + httpPath + - " with " + minWorkerThreads + ".." + maxWorkerThreads + " worker threads"; + String msg = "Started " + ThriftHttpCLIService.class.getSimpleName() + " in " + schemeName + + " mode on port " + portNum + " path=" + httpPath + " with " + minWorkerThreads + "..." + + maxWorkerThreads + " worker threads"; LOG.info(msg); httpServer.join(); } catch (Throwable t) { - LOG.error("Error: ", t); + LOG.fatal( + "Error starting HiveServer2: could not start " + + ThriftHttpCLIService.class.getSimpleName(), t); + System.exit(-1); } } @@ -191,7 +185,8 @@ private static void verifyHttpConfiguration(HiveConf hiveConf) { // NONE in case of thrift mode uses SASL LOG.warn(ConfVars.HIVE_SERVER2_AUTHENTICATION + " setting to " + authType + ". SASL is not supported with http transport mode," + - " so using equivalent of " + AuthTypes.NOSASL); + " so using equivalent of " + + AuthTypes.NOSASL); } } diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index b6b82b1..3d2356b 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -18,6 +18,8 @@ package org.apache.hive.service.server; +import java.nio.charset.Charset; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.LogUtils; @@ -26,12 +28,21 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl; import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; +import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; import org.apache.hive.common.util.HiveStringUtils; +import org.apache.hive.common.util.HiveVersionInfo; import org.apache.hive.service.CompositeService; +import org.apache.hive.service.ServiceException; import org.apache.hive.service.cli.CLIService; import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService; import org.apache.hive.service.cli.thrift.ThriftCLIService; import org.apache.hive.service.cli.thrift.ThriftHttpCLIService; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper; /** * HiveServer2. @@ -42,9 +53,12 @@ private CLIService cliService; private ThriftCLIService thriftCLIService; + private String znodePath; + private ZooKeeper zooKeeperClient; + private boolean registeredWithZooKeeper = false; public HiveServer2() { - super("HiveServer2"); + super(HiveServer2.class.getSimpleName()); HiveConf.setLoadHiveServer2Config(true); } @@ -53,20 +67,129 @@ public HiveServer2() { public synchronized void init(HiveConf hiveConf) { cliService = new CLIService(); addService(cliService); + if (isHTTPTransportMode(hiveConf)) { + thriftCLIService = new ThriftHttpCLIService(cliService); + } else { + thriftCLIService = new ThriftBinaryCLIService(cliService); + } + addService(thriftCLIService); + thriftCLIService.setHiveServer2(this); + super.init(hiveConf); + // Add a shutdown hook for catching SIGTERM & SIGINT + final HiveServer2 hiveServer2 = this; + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + hiveServer2.stop(); + } + }); + } + + public static boolean isHTTPTransportMode(HiveConf hiveConf) { String transportMode = System.getenv("HIVE_SERVER2_TRANSPORT_MODE"); - if(transportMode == null) { + if (transportMode == null) { transportMode = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE); } - if(transportMode != null && (transportMode.equalsIgnoreCase("http"))) { - thriftCLIService = new ThriftHttpCLIService(cliService); + if (transportMode != null && (transportMode.equalsIgnoreCase("http"))) { + return true; } - else { - thriftCLIService = new ThriftBinaryCLIService(cliService); + return false; + } + + /** + * Adds a server instance to ZooKeeper as a znode. + * + * @param hiveConf + * @throws Exception + */ + private void addServerInstanceToZooKeeper(HiveConf hiveConf) throws Exception { + int zooKeeperSessionTimeout = + hiveConf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT); + String zooKeeperEnsemble = ZooKeeperHiveHelper.getQuorumServers(hiveConf); + String rootNamespace = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_NAMESPACE); + String instanceURI = getServerInstanceURI(hiveConf); + byte[] znodeDataUTF8 = instanceURI.getBytes(Charset.forName("UTF-8")); + zooKeeperClient = + new ZooKeeper(zooKeeperEnsemble, zooKeeperSessionTimeout, + new ZooKeeperHiveHelper.DummyWatcher()); + + // Create the parent znodes recursively; ignore if the parent already exists + try { + ZooKeeperHiveHelper.createPathRecursively(zooKeeperClient, rootNamespace, + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + LOG.info("Created the root name space: " + rootNamespace + " on ZooKeeper for HiveServer2"); + } catch (KeeperException e) { + if (e.code() != KeeperException.Code.NODEEXISTS) { + LOG.fatal("Unable to create HiveServer2 namespace: " + rootNamespace + " on ZooKeeper", e); + throw (e); + } } + // Create a znode under the rootNamespace parent for this instance of the server + // Znode name: server-host:port-versionInfo-sequence + try { + String znodePath = + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace + + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + "server-" + instanceURI + "-" + + HiveVersionInfo.getVersion() + "-"; + znodePath = + zooKeeperClient.create(znodePath, znodeDataUTF8, Ids.OPEN_ACL_UNSAFE, + CreateMode.EPHEMERAL_SEQUENTIAL); + setRegisteredWithZooKeeper(true); + // Set a watch on the znode + if (zooKeeperClient.exists(znodePath, new DeRegisterWatcher()) == null) { + // No node exists, throw exception + throw new Exception("Unable to create znode for this HiveServer2 instance on ZooKeeper."); + } + LOG.info("Created a znode on ZooKeeper for HiveServer2 uri: " + instanceURI); + } catch (KeeperException e) { + LOG.fatal("Unable to create a znode for this server instance", e); + throw new Exception(e); + } + } - addService(thriftCLIService); - super.init(hiveConf); + /** + * The watcher class which sets the de-register flag when the znode corresponding to this server + * instance is deleted. Additionally, it shuts down the server if there are no more active client + * sessions at the time of receiving a 'NodeDeleted' notification from ZooKeeper. + */ + private class DeRegisterWatcher implements Watcher { + public void process(WatchedEvent event) { + if (event.getType().equals(Watcher.Event.EventType.NodeDeleted)) { + HiveServer2.this.setRegisteredWithZooKeeper(false); + // If there are no more active client sessions, stop the server + if (cliService.getSessionManager().getOpenSessionCount() == 0) { + LOG.warn("This instance of HiveServer2 has been removed from the list of server " + + "instances available for dynamic service discovery. " + + "The last client session has ended - will shutdown now."); + HiveServer2.this.stop(); + } + LOG.warn("This HiveServer2 instance is now de-registered from ZooKeeper. " + + "The server will be shut down after the last client sesssion completes."); + } + } + } + + private void removeServerInstanceFromZooKeeper() throws Exception { + setRegisteredWithZooKeeper(false); + zooKeeperClient.close(); + LOG.info("Server instance removed from ZooKeeper."); + } + + public boolean isRegisteredWithZooKeeper() { + return registeredWithZooKeeper; + } + + private void setRegisteredWithZooKeeper(boolean registeredWithZooKeeper) { + this.registeredWithZooKeeper = registeredWithZooKeeper; + } + + private String getServerInstanceURI(HiveConf hiveConf) throws Exception { + if ((thriftCLIService == null) || (thriftCLIService.getServerAddress() == null)) { + throw new Exception("Unable to get the server address; it hasn't been initialized yet."); + } + return thriftCLIService.getServerAddress().getHostName() + ":" + + thriftCLIService.getPortNumber(); } @Override @@ -76,16 +199,25 @@ public synchronized void start() { @Override public synchronized void stop() { - super.stop(); - // there should already be an instance of the session pool manager. - // if not, ignoring is fine while stopping the hive server. + LOG.info("Shutting down HiveServer2"); HiveConf hiveConf = this.getHiveConf(); + super.stop(); + // Remove this server instance from ZooKeeper if dynamic service discovery is set + if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) { + try { + removeServerInstanceFromZooKeeper(); + } catch (Exception e) { + LOG.error("Error removing znode for this HiveServer2 instance from ZooKeeper.", e); + } + } + // There should already be an instance of the session pool manager. + // If not, ignoring is fine while stopping HiveServer2. if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_TEZ_INITIALIZE_DEFAULT_SESSIONS)) { try { TezSessionPoolManager.getInstance().stop(); } catch (Exception e) { - LOG.error("Tez session pool manager stop had an error during stop of hive server"); - e.printStackTrace(); + LOG.error("Tez session pool manager stop had an error during stop of HiveServer2. " + + "Shutting down HiveServer2 anyway.", e); } } @@ -100,7 +232,7 @@ public synchronized void stop() { private static void startHiveServer2() throws Throwable { long attempts = 0, maxAttempts = 1; - while(true) { + while (true) { HiveConf hiveConf = new HiveConf(); maxAttempts = hiveConf.getLongVar(HiveConf.ConfVars.HIVE_SERVER2_MAX_START_ATTEMPTS); HiveServer2 server = null; @@ -108,6 +240,11 @@ private static void startHiveServer2() throws Throwable { server = new HiveServer2(); server.init(hiveConf); server.start(); + // If we're supporting dynamic service discovery, we'll add the service uri for this + // HiveServer2 instance to Zookeeper as a znode. + if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) { + server.addServerInstanceToZooKeeper(hiveConf); + } if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_TEZ_INITIALIZE_DEFAULT_SESSIONS)) { TezSessionPoolManager sessionPool = TezSessionPoolManager.getInstance(); sessionPool.setupPool(hiveConf); @@ -119,19 +256,19 @@ private static void startHiveServer2() throws Throwable { } break; } catch (Throwable throwable) { - if(++attempts >= maxAttempts) { + if (++attempts >= maxAttempts) { throw new Error("Max start attempts " + maxAttempts + " exhausted", throwable); } else { - LOG.warn("Error starting HiveServer2 on attempt " + attempts + - ", will retry in 60 seconds", throwable); + LOG.warn("Error starting HiveServer2 on attempt " + attempts + + ", will retry in 60 seconds", throwable); try { if (server != null) { server.stop(); server = null; } } catch (Exception e) { - LOG.info("Exception caught when calling stop of HiveServer2 before" + - " retrying start", e); + LOG.info( + "Exception caught when calling stop of HiveServer2 before" + " retrying start", e); } try { Thread.sleep(60L * 1000L); @@ -152,14 +289,15 @@ public static void main(String[] args) { System.exit(-1); } - //NOTE: It is critical to do this here so that log4j is reinitialized + // NOTE: It is critical to do this here so that log4j is reinitialized // before any of the other core hive classes are loaded String initLog4jMessage = LogUtils.initHiveLog4j(); LOG.debug(initLog4jMessage); HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG); - //log debug message from "oproc" after log4j initialize properly + // log debug message from "oproc" after log4j initialize properly LOG.debug(oproc.getDebugMessage().toString()); + startHiveServer2(); } catch (LogInitializationException e) { LOG.error("Error initializing log: " + e.getMessage(), e); @@ -169,6 +307,5 @@ public static void main(String[] args) { System.exit(-1); } } - } diff --git a/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java b/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java index 66fc1fc..5b1cbc0 100644 --- a/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java +++ b/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java @@ -27,7 +27,11 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hive.service.cli.*; +import org.apache.hive.service.cli.CLIService; +import org.apache.hive.service.cli.ICLIService; +import org.apache.hive.service.cli.OperationHandle; +import org.apache.hive.service.cli.RowSet; +import org.apache.hive.service.cli.SessionHandle; import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService; import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient; import org.junit.After; @@ -83,7 +87,7 @@ public void setUp() throws Exception { // set up service and client HiveConf hiveConf = new HiveConf(); - hiveConf.setVar(HiveConf.ConfVars.HIVE_GLOBAL_INIT_FILE_LOCATION, + hiveConf.setVar(HiveConf.ConfVars.HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION, initFile.getParentFile().getAbsolutePath()); service = new FakeEmbeddedThriftBinaryCLIService(hiveConf); service.init(new HiveConf()); diff --git a/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java b/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java index faae060..9592bc4 100644 --- a/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java +++ b/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java @@ -37,6 +37,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import javax.security.auth.Subject; import javax.security.auth.login.LoginException; @@ -652,6 +653,17 @@ public UserGroupInformation createProxyUser(String userName) throws IOException } @Override + public TreeMap getLocationsWithOffset(FileSystem fs, + FileStatus status) throws IOException { + TreeMap offsetBlockMap = new TreeMap(); + BlockLocation[] locations = getLocations(fs, status); + for (BlockLocation location : locations) { + offsetBlockMap.put(location.getOffset(), location); + } + return offsetBlockMap; + } + + @Override public void hflush(FSDataOutputStream stream) throws IOException { stream.sync(); } diff --git a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java index a809eb1..bbb3f4e 100644 --- a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java +++ b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java @@ -27,6 +27,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; @@ -403,6 +404,17 @@ public WebHCatJTShim getWebHCatShim(Configuration conf, UserGroupInformation ugi } @Override + public TreeMap getLocationsWithOffset(FileSystem fs, + FileStatus status) throws IOException { + TreeMap offsetBlockMap = new TreeMap(); + BlockLocation[] locations = getLocations(fs, status); + for (BlockLocation location : locations) { + offsetBlockMap.put(location.getOffset(), location); + } + return offsetBlockMap; + } + + @Override public void hflush(FSDataOutputStream stream) throws IOException { stream.sync(); } diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index f8d9346..afeb073 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -29,6 +29,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -511,6 +512,17 @@ public WebHCatJTShim getWebHCatShim(Configuration conf, UserGroupInformation ugi } @Override + public TreeMap getLocationsWithOffset(FileSystem fs, + FileStatus status) throws IOException { + TreeMap offsetBlockMap = new TreeMap(); + BlockLocation[] locations = getLocations(fs, status); + for (BlockLocation location : locations) { + offsetBlockMap.put(location.getOffset(), location); + } + return offsetBlockMap; + } + + @Override public void hflush(FSDataOutputStream stream) throws IOException { stream.hflush(); } diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 964c38d..282437c 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -30,6 +30,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.TreeMap; import javax.security.auth.login.LoginException; @@ -477,6 +478,19 @@ RecordReader getRecordReader(JobConf job, InputSplitShim split, Reporter reporte FileStatus status) throws IOException; /** + * For the block locations returned by getLocations() convert them into a Treemap + * by iterating over the list of blockLocation. + * Using TreeMap from offset to blockLocation, makes it O(logn) to get a particular + * block based upon offset. + * @param fs the file system + * @param status the file information + * @return TreeMap + * @throws IOException + */ + TreeMap getLocationsWithOffset(FileSystem fs, + FileStatus status) throws IOException; + + /** * Flush and make visible to other users the changes to the given stream. * @param stream the stream to hflush. * @throws IOException