diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 06c3f6c..27b8504 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -216,7 +216,7 @@ public int processCmd(String cmd) { } } else { // local mode try { - CommandProcessor proc = CommandProcessorFactory.get(tokens[0], (HiveConf) conf); + CommandProcessor proc = CommandProcessorFactory.get(tokens, (HiveConf) conf); ret = processLocalCmd(cmd, proc, ss); } catch (SQLException e) { console.printError("Failed processing command " + tokens[0] + " " + e.getLocalizedMessage(), @@ -579,8 +579,9 @@ public boolean isDelimiterChar (String buffer, int pos) { // We stack a custom Completor on top of our ArgumentCompletor // to reverse this. Completor completor = new Completor () { + @Override public int complete (String buffer, int offset, List completions) { - List comp = (List) completions; + List comp = completions; int ret = ac.complete(buffer, offset, completions); // ConsoleReader will do the substitution if and only if there // is exactly one valid completion, so we ignore other cases. diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3f50361..ec1f09e 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -830,7 +830,7 @@ HIVE_SECURITY_COMMAND_WHITELIST("hive.security.command.whitelist", "set,reset,dfs,add,delete,compile"), - HIVE_CONF_RESTRICTED_LIST("hive.conf.restricted.list", ""), + HIVE_CONF_RESTRICTED_LIST("hive.conf.restricted.list", "hive.security.authenticator.manager,hive.security.authorization.manager"), // If this is set all move tasks at the end of a multi-insert query will only begin once all // outputs are ready @@ -880,7 +880,7 @@ HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000), HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1), - + HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true), @@ -895,7 +895,7 @@ HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES("hive.explain.dependency.append.tasktype", false), HIVECOUNTERGROUP("hive.counters.group.name", "HIVE"), - + // none, column // none is the default(past) behavior. Implies only alphaNumeric and underscore are valid characters in identifiers. // column: implies column names can contain any character. diff --git a/conf/hive-default.xml.template b/conf/hive-default.xml.template index 420d959..e1ce436 100644 --- a/conf/hive-default.xml.template +++ b/conf/hive-default.xml.template @@ -1656,7 +1656,7 @@ hive.conf.restricted.list - + hive.security.authenticator.manager,hive.security.authorization.manager Comma separated list of configuration options which are immutable at runtime diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml index 3cdea2f..be54fda 100644 --- a/data/conf/hive-site.xml +++ b/data/conf/hive-site.xml @@ -197,4 +197,10 @@ 0 + + hive.conf.restricted.list + dummy.config.value + Using dummy config value above because you cannot override config with empty value + + diff --git a/data/files/parquet_create.txt b/data/files/parquet_create.txt new file mode 100644 index 0000000..ccd48ee --- /dev/null +++ b/data/files/parquet_create.txt @@ -0,0 +1,3 @@ +1|foo line1|key11:value11,key12:value12,key13:value13|a,b,c|one,two +2|bar line2|key21:value21,key22:value22,key23:value23|d,e,f|three,four +3|baz line3|key31:value31,key32:value32,key33:value33|g,h,i|five,six diff --git a/data/files/parquet_partitioned.txt b/data/files/parquet_partitioned.txt new file mode 100644 index 0000000..8f322f3 --- /dev/null +++ b/data/files/parquet_partitioned.txt @@ -0,0 +1,3 @@ +1|foo|part1 +2|bar|part2 +3|baz|part2 diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java index 8cd594b..5752303 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java @@ -28,6 +28,8 @@ import java.util.Properties; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; @@ -66,6 +68,7 @@ public class HBaseStorageHandler extends DefaultStorageHandler implements HiveMetaHook, HiveStoragePredicateHandler { + final static private Log LOG = LogFactory.getLog(HBaseStorageHandler.class); final static public String DEFAULT_PREFIX = "default."; //Check if the configure job properties is called from input @@ -255,7 +258,11 @@ public void setConf(Configuration conf) { @Override public Class getOutputFormatClass() { - return org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat.class; + if (jobConf.getBoolean("hive.hbase.bulkload", false)) { + return HiveHFileOutputFormat.class; + } else { + return HiveHBaseTableOutputFormat.class; + } } @Override @@ -342,10 +349,24 @@ public void configureTableJobProperties( } //input job properties } else { - jobProperties.put(TableOutputFormat.OUTPUT_TABLE, tableName); + if (isHBaseBulkLoad(jobConf)) { + // only support bulkload when a hfile.family.path has been specified. + // TODO: support generating a temporary output path when hfile.family.path is not specified. + // TODO: support loading into multiple CF's at a time + String path = HiveHFileOutputFormat.getFamilyPath(jobConf, tableProperties); + // TODO: use a variation of FileOutputFormat.setOutputPath + LOG.debug("Setting mapred.output.dir to " + path); + jobProperties.put("mapred.output.dir", path); + } else { + jobProperties.put(TableOutputFormat.OUTPUT_TABLE, tableName); + } } // output job properties } + private static final boolean isHBaseBulkLoad(Configuration conf) { + return conf.getBoolean("hive.hbase.bulkload", false); + } + /** * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map * if they are not already present in the jobConf. @@ -378,7 +399,7 @@ public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { try { TableMapReduceUtil.addDependencyJars(jobConf); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(jobConf, - HBaseStorageHandler.class, org.apache.hadoop.hbase.HBaseConfiguration.class); + HBaseStorageHandler.class, org.apache.hadoop.hbase.HBaseConfiguration.class); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHFileOutputFormat.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHFileOutputFormat.java index 6d383b5..16d9a9e 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHFileOutputFormat.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHFileOutputFormat.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hive.hbase; import java.io.IOException; +import java.io.InterruptedIOException; +import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Properties; import java.util.SortedMap; @@ -27,10 +30,14 @@ import org.apache.commons.lang.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; import org.apache.hadoop.hbase.util.Bytes; @@ -54,10 +61,9 @@ HFileOutputFormat implements HiveOutputFormat { - private static final String HFILE_FAMILY_PATH = "hfile.family.path"; + public static final String HFILE_FAMILY_PATH = "hfile.family.path"; - static final Log LOG = LogFactory.getLog( - HiveHFileOutputFormat.class.getName()); + static final Log LOG = LogFactory.getLog(HiveHFileOutputFormat.class.getName()); private org.apache.hadoop.mapreduce.RecordWriter @@ -70,6 +76,14 @@ } } + /** + * Retrieve the family path, first check the JobConf, then the table properties. + * @return the family path or null if not specified. + */ + public static String getFamilyPath(Configuration jc, Properties tableProps) { + return jc.get(HFILE_FAMILY_PATH, tableProps.getProperty(HFILE_FAMILY_PATH)); + } + @Override public FSRecordWriter getHiveRecordWriter( final JobConf jc, @@ -79,8 +93,8 @@ public FSRecordWriter getHiveRecordWriter( Properties tableProperties, final Progressable progressable) throws IOException { - // Read configuration for the target path - String hfilePath = tableProperties.getProperty(HFILE_FAMILY_PATH); + // Read configuration for the target path, first from jobconf, then from table properties + String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException( "Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); @@ -129,14 +143,20 @@ public void close(boolean abort) throws IOException { if (abort) { return; } - // Move the region file(s) from the task output directory - // to the location specified by the user. There should - // actually only be one (each reducer produces one HFile), - // but we don't know what its name is. + /* + * Move the region file(s) from the task output directory to the location specified by + * the user. There should actually only be one (each reducer produces one HFile), but + * we don't know what its name is. + * + * TODO: simplify bulkload to detecting the HBaseStorageHandler scenario, ignore + * hfile.family.path, skip this move step and allow MoveTask to operate directly off + * of SemanticAnalyzer's queryTempdir. + */ FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { + LOG.debug("Looking for column family names in " + srcDir); FileStatus [] files = fs.listStatus(srcDir); if ((files == null) || (files.length == 0)) { throw new IOException("No files found in " + srcDir); @@ -150,6 +170,7 @@ public void close(boolean abort) throws IOException { } } for (FileStatus regionFile : fs.listStatus(srcDir)) { + LOG.debug("Moving hfile " + regionFile.getPath() + " to new parent directory " + columnFamilyPath); fs.rename( regionFile.getPath(), new Path( @@ -165,10 +186,9 @@ public void close(boolean abort) throws IOException { } } - @Override - public void write(Writable w) throws IOException { + private void writeTest(Text text) throws IOException { // Decompose the incoming text row into fields. - String s = ((Text) w).toString(); + String s = text.toString(); String [] fields = s.split("\u0001"); assert(fields.length <= (columnMap.size() + 1)); // First field is the row key. @@ -196,11 +216,40 @@ public void write(Writable w) throws IOException { valBytes); try { fileWriter.write(null, kv); + } catch (IOException e) { + LOG.info("Failed while writing row: " + s); + throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } + + private void writePut(PutWritable put) throws IOException { + ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); + SortedMap> cells = put.getPut().getFamilyCellMap(); + for (Map.Entry> entry : cells.entrySet()) { + Collections.sort(entry.getValue(), new CellComparator()); + for (Cell c : entry.getValue()) { + try { + fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); + } catch (InterruptedException e) { + throw (InterruptedIOException) new InterruptedIOException().initCause(e); + } + } + } + } + + @Override + public void write(Writable w) throws IOException { + if (w instanceof Text) { + writeTest((Text) w); + } else if (w instanceof PutWritable) { + writePut((PutWritable) w); + } else { + throw new IOException("Unexpected writable " + w); + } + } }; } diff --git a/hbase-handler/src/test/queries/positive/hbase_storage_handler_bulk.q b/hbase-handler/src/test/queries/positive/hbase_storage_handler_bulk.q new file mode 100644 index 0000000..f8354e5 --- /dev/null +++ b/hbase-handler/src/test/queries/positive/hbase_storage_handler_bulk.q @@ -0,0 +1,13 @@ +-- -*- mode:sql -*- + +DROP TABLE IF EXISTS hbase_bulk; + +CREATE TABLE hbase_bulk (key INT, value STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ('hbase.columns.mapping' = ':key,cf:string'); + +INSERT OVERWRITE TABLE hbase_bulk SELECT * FROM src; + +SET hive.hbase.bulkload = true; +SET hfile.family.path = /tmp/bulk_hfiles/f; +EXPLAIN INSERT OVERWRITE TABLE hbase_bulk SELECT * FROM src CLUSTER BY key; diff --git a/hbase-handler/src/test/results/positive/hbase_storage_handler_bulk.q.out b/hbase-handler/src/test/results/positive/hbase_storage_handler_bulk.q.out new file mode 100644 index 0000000..860582a --- /dev/null +++ b/hbase-handler/src/test/results/positive/hbase_storage_handler_bulk.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: -- -*- mode:sql -*- + +DROP TABLE IF EXISTS hbase_bulk +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- -*- mode:sql -*- + +DROP TABLE IF EXISTS hbase_bulk +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE hbase_bulk (key INT, value STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ('hbase.columns.mapping' = ':key,cf:string') +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE hbase_bulk (key INT, value STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ('hbase.columns.mapping' = ':key,cf:string') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@hbase_bulk +PREHOOK: query: INSERT OVERWRITE TABLE hbase_bulk SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@hbase_bulk +POSTHOOK: query: INSERT OVERWRITE TABLE hbase_bulk SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@hbase_bulk +PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE hbase_bulk SELECT * FROM src CLUSTER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE hbase_bulk SELECT * FROM src CLUSTER BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat + output format: org.apache.hadoop.hive.hbase.HiveHFileOutputFormat + serde: org.apache.hadoop.hive.hbase.HBaseSerDe + name: default.hbase_bulk + + Stage: Stage-0 + Move Operator + HBase completeBulkLoad: + table: + input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat + output format: org.apache.hadoop.hive.hbase.HiveHFileOutputFormat + serde: org.apache.hadoop.hive.hbase.HBaseSerDe + name: default.hbase_bulk + + Stage: Stage-2 + Stats-Aggr Operator + diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/DummyAuthenticator.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/DummyAuthenticator.java index 578a177..a296ac5 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/DummyAuthenticator.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/DummyAuthenticator.java @@ -22,11 +22,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; public class DummyAuthenticator implements HiveAuthenticationProvider { - private List groupNames; - private String userName; + private final List groupNames; + private final String userName; private Configuration conf; public DummyAuthenticator() { @@ -56,8 +57,14 @@ public void setConf(Configuration conf) { this.conf = conf; } + @Override public Configuration getConf() { return this.conf; } + @Override + public void setSessionState(SessionState ss) { + //no op + } + } diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/InjectableDummyAuthenticator.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/InjectableDummyAuthenticator.java index 2dd225e..351ef00 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/InjectableDummyAuthenticator.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/InjectableDummyAuthenticator.java @@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; /** * @@ -80,7 +81,7 @@ public Configuration getConf() { @Override public void setConf(Configuration config) { try { - hmap = (HiveMetastoreAuthenticationProvider) hmapClass.newInstance(); + hmap = hmapClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Whoops, could not create an Authenticator of class " + hmapClass.getName()); @@ -102,4 +103,9 @@ public void destroy() throws HiveException { hmap.destroy(); } + @Override + public void setSessionState(SessionState arg0) { + //no-op + } + } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index 377709f..7af4368 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -20,7 +20,6 @@ import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -931,6 +930,8 @@ public boolean revoke_role(String role_name, String user_name, throws MetaException, TException; /** + * Return the privileges that the user, group have directly and indirectly through roles + * on the given hiveObject * @param hiveObject * @param user_name * @param group_names @@ -943,6 +944,7 @@ public PrincipalPrivilegeSet get_privilege_set(HiveObjectRef hiveObject, TException; /** + * Return the privileges that this principal has directly over the object (not through roles). * @param principal_name * @param principal_type * @param hiveObject diff --git a/pom.xml b/pom.xml index e652f08..e0e3634 100644 --- a/pom.xml +++ b/pom.xml @@ -128,6 +128,7 @@ requires netty < 3.6.0 we force hadoops version --> 3.4.0.Final + 1.3.2 0.12.0 2.5.0 1.0.1 @@ -223,6 +224,17 @@ ${bonecp.version} + com.twitter + parquet-hadoop-bundle + ${parquet.version} + + + com.twitter + parquet-column + ${parquet.version} + tests + + com.sun.jersey jersey-core ${jersey.version} diff --git a/ql/pom.xml b/ql/pom.xml index 7087a4c..8003e47 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -67,6 +67,10 @@ ${kryo.version} + com.twitter + parquet-hadoop-bundle + + commons-codec commons-codec ${commons-codec.version} @@ -204,6 +208,12 @@ + com.twitter + parquet-column + tests + test + + junit junit ${junit.version} @@ -321,7 +331,17 @@ org.apache.hadoop hadoop-core ${hadoop-20S.version} - true + true + + + org.apache.hbase + hbase-client + ${hbase.hadoop1.version} + + + org.apache.hbase + hbase-server + ${hbase.hadoop1.version} @@ -363,6 +383,16 @@ ${hadoop-23.version} true + + org.apache.hbase + hbase-client + ${hbase.hadoop2.version} + + + org.apache.hbase + hbase-server + ${hbase.hadoop2.version} + @@ -476,6 +506,7 @@ org.apache.hive:hive-exec org.apache.hive:hive-serde com.esotericsoftware.kryo:kryo + com.twiter:parquet-hadoop-bundle org.apache.thrift:libthrift commons-lang:commons-lang org.json:json diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 6705ec4..cbd9e59 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.Entity.Type; import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; import org.apache.hadoop.hive.ql.hooks.Hook; import org.apache.hadoop.hive.ql.hooks.HookContext; @@ -185,6 +186,7 @@ private void createLockManager() throws SemanticException { } } + @Override public void init() { Operator.resetId(); } @@ -728,7 +730,7 @@ private void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet hiveLocks) { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RELEASE_LOCKS); } + @Override public CommandProcessorResponse run(String command) throws CommandNeedRetryException { return run(command, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 32831fa..1f41a4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -657,7 +657,7 @@ private int showGrantsV2(ShowGrantDesc showGrantDesc) throws HiveException { //only grantInfo is used HiveObjectPrivilege thriftObjectPriv = new HiveObjectPrivilege(new HiveObjectRef( AuthorizationUtils.getThriftHiveObjType(privObj.getType()),privObj.getDbname(), - privObj.getTableviewname(),null,null), principal.getName(), + privObj.getTableviewname(),null,null), principal.getName(), AuthorizationUtils.getThriftPrincipalType(principal.getType()), grantInfo); privList.add(thriftObjectPriv); } @@ -970,6 +970,17 @@ private int roleDDLV2(RoleDDLDesc roleDDLDesc) throws HiveException, IOException List allRoles = authorizer.getAllRoles(); writeListToFile(allRoles, roleDDLDesc.getResFile()); break; + case SHOW_CURRENT_ROLE: + List currentRoles = authorizer.getCurrentRoles(); + List roleNames = new ArrayList(currentRoles.size()); + for (HiveRole role : currentRoles) { + roleNames.add(role.getRoleName()); + } + writeListToFile(roleNames, roleDDLDesc.getResFile()); + break; + case SET_ROLE: + authorizer.setCurrentRole(roleDDLDesc.getName()); + break; default: throw new HiveException("Unkown role operation " + operation.getOperationName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index ed7787d..fe5512f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -27,13 +27,20 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HConnectionManager; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; @@ -57,6 +64,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.plan.HBaseCompleteBulkLoadDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; @@ -195,6 +203,20 @@ private void releaseLocks(LoadTableDesc ltd) throws HiveException { } } + private void completeBulkLoad(Path sourcePath, String targetTable, Configuration conf) throws Exception { + LoadIncrementalHFiles loadIncrementalHFiles = new LoadIncrementalHFiles(conf); + HConnection conn = null; + HTable table = null; + try { + conn = HConnectionManager.createConnection(conf); + table = (HTable) conn.getTable(targetTable); + loadIncrementalHFiles.doBulkLoad(sourcePath, table); + } finally { + if (table != null) table.close(); + if (conn != null) conn.close(); + } + } + @Override public int execute(DriverContext driverContext) { @@ -428,6 +450,41 @@ public int execute(DriverContext driverContext) { releaseLocks(tbd); } + // for HFiles + HBaseCompleteBulkLoadDesc cbld = work.getCompleteBulkLoadWork(); + if (cbld != null) { + // lookup hfile.family.path. Duplicated from HiveHFileOutputFormat#getFamilyPath + Configuration conf = driverContext.getCtx().getConf(); + Properties tableProps = cbld.getTable().getProperties(); + Path columnFamilyPath = new Path(conf.get("hfile.family.path", tableProps.getProperty("hfile.family.path"))); + Path sourcePath = columnFamilyPath.getParent(); + // TODO: assert hfile.family.path is a directory of HFiles + assert sourcePath.getFileSystem(driverContext.getCtx().getConf()).isDirectory(sourcePath) : sourcePath + " is not a directory."; + + String tableName = tableProps.getProperty("hbase.table.name" /* HBaseSerDe#HBASE_TABLE_NAME */); + conf = HBaseConfiguration.create(conf); + console.printInfo("Registering HFiles with RegionServers: " + sourcePath + " => " + tableName); + completeBulkLoad(sourcePath, tableName, conf); + + // after bulkload, all hfiles should be gone + FileSystem fs = columnFamilyPath.getFileSystem(conf); + FileStatus[] files = fs.listStatus(columnFamilyPath); + if (files == null || files.length == 0) { + // bulkload succeeded. Clean up empty column family directory. + fs.delete(columnFamilyPath, true); + } else { + // bulkload failed. report abandoned files. + long totalSize = 0; + for (FileStatus f : files) { + totalSize += f.getLen(); + } + String msg = "Failed to bulkload all HFiles in " + columnFamilyPath + ". Roughly " + + StringUtils.humanReadableInt(totalSize) + "bytes abandoned."; + console.printError("HFiles remain; registration failed!", msg); + return 1; + } + } + return 0; } catch (Exception e) { console.printError("Failed with exception " + e.getMessage(), "\n" diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 597358a..43221d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -94,6 +94,9 @@ public int execute(DriverContext driverContext) { if (work.getLoadFileDesc() != null) { workComponentsPresent++; } + if (work.getCompleteBulkLoadDesc() != null) { + workComponentsPresent++; + } assert (workComponentsPresent == 1); @@ -101,8 +104,10 @@ public int execute(DriverContext driverContext) { try { if (work.getLoadTableDesc() != null) { tableName = work.getLoadTableDesc().getTable().getTableName(); - } else if (work.getTableSpecs() != null){ + } else if (work.getTableSpecs() != null) { tableName = work.getTableSpecs().tableName; + } else if (work.getCompleteBulkLoadDesc() != null) { + tableName = work.getCompleteBulkLoadDesc().getTable().getTableName(); } else { tableName = work.getLoadFileDesc().getDestinationCreateTable(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java new file mode 100644 index 0000000..4131066 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io; + +public final class IOConstants { + public static final String COLUMNS = "columns"; + public static final String COLUMNS_TYPES = "columns.types"; + public static final String MAPRED_TASK_ID = "mapred.task.id"; + + private IOConstants() { + // prevent instantiation + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java new file mode 100644 index 0000000..d3412df --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -0,0 +1,56 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.RecordReader; + +import parquet.hadoop.ParquetInputFormat; + + +/** + * + * A Parquet InputFormat for Hive (with the deprecated package mapred) + * + */ +public class MapredParquetInputFormat extends FileInputFormat { + + private final ParquetInputFormat realInput; + + public MapredParquetInputFormat() { + this(new ParquetInputFormat(DataWritableReadSupport.class)); + } + + protected MapredParquetInputFormat(final ParquetInputFormat inputFormat) { + this.realInput = inputFormat; + } + + @Override + public org.apache.hadoop.mapred.RecordReader getRecordReader( + final org.apache.hadoop.mapred.InputSplit split, + final org.apache.hadoop.mapred.JobConf job, + final org.apache.hadoop.mapred.Reporter reporter + ) throws IOException { + try { + return (RecordReader) new ParquetRecordReaderWrapper(realInput, split, job, reporter); + } catch (final InterruptedException e) { + throw new RuntimeException("Cannot create a RecordReaderWrapper", e); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java new file mode 100644 index 0000000..b87c673 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java @@ -0,0 +1,125 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.FSRecordWriter; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter; +import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport; +import org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.util.Progressable; + +import parquet.hadoop.ParquetOutputFormat; + +/** + * + * A Parquet OutputFormat for Hive (with the deprecated package mapred) + * + */ +public class MapredParquetOutputFormat extends FileOutputFormat implements + HiveOutputFormat { + + private static final Log LOG = LogFactory.getLog(MapredParquetOutputFormat.class); + + protected ParquetOutputFormat realOutputFormat; + + public MapredParquetOutputFormat() { + realOutputFormat = new ParquetOutputFormat(new DataWritableWriteSupport()); + } + + public MapredParquetOutputFormat(final OutputFormat mapreduceOutputFormat) { + realOutputFormat = (ParquetOutputFormat) mapreduceOutputFormat; + } + + @Override + public void checkOutputSpecs(final FileSystem ignored, final JobConf job) throws IOException { + realOutputFormat.checkOutputSpecs(ShimLoader.getHadoopShims().getHCatShim().createJobContext(job, null)); + } + + @Override + public RecordWriter getRecordWriter( + final FileSystem ignored, + final JobConf job, + final String name, + final Progressable progress + ) throws IOException { + throw new RuntimeException("Should never be used"); + } + + /** + * + * Create the parquet schema from the hive schema, and return the RecordWriterWrapper which + * contains the real output format + */ + @Override + public FSRecordWriter getHiveRecordWriter( + final JobConf jobConf, + final Path finalOutPath, + final Class valueClass, + final boolean isCompressed, + final Properties tableProperties, + final Progressable progress) throws IOException { + + LOG.info("creating new record writer..." + this); + + final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS); + final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES); + List columnNames; + List columnTypes; + + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + + DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf); + return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress); + } + + protected ParquetRecordWriterWrapper getParquerRecordWriterWrapper( + ParquetOutputFormat realOutputFormat, + JobConf jobConf, + String finalOutPath, + Progressable progress + ) throws IOException { + return new ParquetRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java new file mode 100644 index 0000000..2f155f6 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java @@ -0,0 +1,152 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.mapred.JobConf; + +public class ProjectionPusher { + + private static final Log LOG = LogFactory.getLog(ProjectionPusher.class); + + private final Map pathToPartitionInfo = + new LinkedHashMap(); + /** + * MapWork is the Hive object which describes input files, + * columns projections, and filters. + */ + private MapWork mapWork; + + /** + * Sets the mapWork variable based on the current JobConf in order to get all partitions. + * + * @param job + */ + private void updateMrWork(final JobConf job) { + final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN); + if (mapWork == null && plan != null && plan.length() > 0) { + mapWork = Utilities.getMapWork(job); + pathToPartitionInfo.clear(); + for (final Map.Entry entry : mapWork.getPathToPartitionInfo().entrySet()) { + // key contains scheme (such as pfile://) and we want only the path portion fix in HIVE-6366 + pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath(), entry.getValue()); + } + } + } + + private void pushProjectionsAndFilters(final JobConf jobConf, + final String splitPath, final String splitPathWithNoSchema) { + + if (mapWork == null) { + return; + } else if (mapWork.getPathToAliases() == null) { + return; + } + + final ArrayList aliases = new ArrayList(); + final Iterator>> iterator = mapWork.getPathToAliases().entrySet().iterator(); + + while (iterator.hasNext()) { + final Entry> entry = iterator.next(); + final String key = new Path(entry.getKey()).toUri().getPath(); + if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) { + final ArrayList list = entry.getValue(); + for (final String val : list) { + aliases.add(val); + } + } + } + + for (final String alias : aliases) { + final Operator op = mapWork.getAliasToWork().get( + alias); + if (op != null && op instanceof TableScanOperator) { + final TableScanOperator tableScan = (TableScanOperator) op; + + // push down projections + final List list = tableScan.getNeededColumnIDs(); + + if (list != null) { + ColumnProjectionUtils.appendReadColumnIDs(jobConf, list); + } else { + ColumnProjectionUtils.setFullyReadColumns(jobConf); + } + + pushFilters(jobConf, tableScan); + } + } + } + + private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) { + + final TableScanDesc scanDesc = tableScan.getConf(); + if (scanDesc == null) { + LOG.debug("Not pushing filters because TableScanDesc is null"); + return; + } + + // construct column name list for reference by filter push down + Utilities.setColumnNameList(jobConf, tableScan); + + // push down filters + final ExprNodeGenericFuncDesc filterExpr = scanDesc.getFilterExpr(); + if (filterExpr == null) { + LOG.debug("Not pushing filters because FilterExpr is null"); + return; + } + + final String filterText = filterExpr.getExprString(); + final String filterExprSerialized = Utilities.serializeExpression(filterExpr); + jobConf.set( + TableScanDesc.FILTER_TEXT_CONF_STR, + filterText); + jobConf.set( + TableScanDesc.FILTER_EXPR_CONF_STR, + filterExprSerialized); + } + + + public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) + throws IOException { + updateMrWork(jobConf); // TODO: refactor this in HIVE-6366 + final JobConf cloneJobConf = new JobConf(jobConf); + final PartitionDesc part = pathToPartitionInfo.get(path.toString()); + + if ((part != null) && (part.getTableDesc() != null)) { + Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); + } + pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString()); + return cloneJobConf; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java new file mode 100644 index 0000000..582a5df --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java @@ -0,0 +1,85 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; + +import parquet.io.ParquetDecodingException; +import parquet.io.api.Converter; +import parquet.schema.GroupType; + +public class ArrayWritableGroupConverter extends HiveGroupConverter { + + private final Converter[] converters; + private final HiveGroupConverter parent; + private final int index; + private final boolean isMap; + private Writable currentValue; + private Writable[] mapPairContainer; + + public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent, + final int index) { + this.parent = parent; + this.index = index; + int count = groupType.getFieldCount(); + if (count < 1 || count > 2) { + throw new IllegalStateException("Field count must be either 1 or 2: " + count); + } + isMap = count == 2; + converters = new Converter[count]; + for (int i = 0; i < count; i++) { + converters[i] = getConverterFromDescription(groupType.getType(i), i, this); + } + } + + @Override + public Converter getConverter(final int fieldIndex) { + return converters[fieldIndex]; + } + + @Override + public void start() { + if (isMap) { + mapPairContainer = new Writable[2]; + } + } + + @Override + public void end() { + if (isMap) { + currentValue = new ArrayWritable(Writable.class, mapPairContainer); + } + parent.add(index, currentValue); + } + + @Override + protected void set(final int index, final Writable value) { + if (index != 0 && mapPairContainer == null || index > 1) { + throw new ParquetDecodingException("Repeated group can only have one or two fields for maps." + + " Not allowed to set for the index : " + index); + } + + if (isMap) { + mapPairContainer[index] = value; + } else { + currentValue = value; + } + } + + @Override + protected void add(final int index, final Writable value) { + set(index, value); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java new file mode 100644 index 0000000..0e310fb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java @@ -0,0 +1,140 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; + +import parquet.io.api.Converter; +import parquet.schema.GroupType; +import parquet.schema.Type; + +/** + * + * A MapWritableGroupConverter, real converter between hive and parquet types recursively for complex types. + * + */ +public class DataWritableGroupConverter extends HiveGroupConverter { + + private final Converter[] converters; + private final HiveGroupConverter parent; + private final int index; + private final Object[] currentArr; + private Writable[] rootMap; + + public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) { + this(requestedSchema, null, 0, tableSchema); + final int fieldCount = tableSchema.getFieldCount(); + this.rootMap = new Writable[fieldCount]; + } + + public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent, + final int index) { + this(groupType, parent, index, groupType); + } + + public DataWritableGroupConverter(final GroupType selectedGroupType, + final HiveGroupConverter parent, final int index, final GroupType containingGroupType) { + this.parent = parent; + this.index = index; + final int totalFieldCount = containingGroupType.getFieldCount(); + final int selectedFieldCount = selectedGroupType.getFieldCount(); + + currentArr = new Object[totalFieldCount]; + converters = new Converter[selectedFieldCount]; + + List selectedFields = selectedGroupType.getFields(); + for (int i = 0; i < selectedFieldCount; i++) { + Type subtype = selectedFields.get(i); + if (containingGroupType.getFields().contains(subtype)) { + converters[i] = getConverterFromDescription(subtype, + containingGroupType.getFieldIndex(subtype.getName()), this); + } else { + throw new IllegalStateException("Group type [" + containingGroupType + + "] does not contain requested field: " + subtype); + } + } + } + + public final ArrayWritable getCurrentArray() { + final Writable[] writableArr; + if (this.rootMap != null) { // We're at the root : we can safely re-use the same map to save perf + writableArr = this.rootMap; + } else { + writableArr = new Writable[currentArr.length]; + } + + for (int i = 0; i < currentArr.length; i++) { + final Object obj = currentArr[i]; + if (obj instanceof List) { + final List objList = (List)obj; + final ArrayWritable arr = new ArrayWritable(Writable.class, + objList.toArray(new Writable[objList.size()])); + writableArr[i] = arr; + } else { + writableArr[i] = (Writable) obj; + } + } + return new ArrayWritable(Writable.class, writableArr); + } + + @Override + final protected void set(final int index, final Writable value) { + currentArr[index] = value; + } + + @Override + public Converter getConverter(final int fieldIndex) { + return converters[fieldIndex]; + } + + @Override + public void start() { + for (int i = 0; i < currentArr.length; i++) { + currentArr[i] = null; + } + } + + @Override + public void end() { + if (parent != null) { + parent.set(index, getCurrentArray()); + } + } + + @Override + protected void add(final int index, final Writable value) { + if (currentArr[index] != null) { + final Object obj = currentArr[index]; + if (obj instanceof List) { + final List list = (List) obj; + list.add(value); + } else { + throw new IllegalStateException("This should be a List: " + obj); + } + } else { + // create a list here because we don't know the final length of the object + // and it is more flexible than ArrayWritable. + // + // converted to ArrayWritable by getCurrentArray(). + final List buffer = new ArrayList(); + buffer.add(value); + currentArr[index] = (Object) buffer; + } + + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java new file mode 100644 index 0000000..7762afe --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java @@ -0,0 +1,44 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.io.ArrayWritable; + +import parquet.io.api.GroupConverter; +import parquet.io.api.RecordMaterializer; +import parquet.schema.GroupType; + +/** + * + * A MapWritableReadSupport, encapsulates the tuples + * + */ +public class DataWritableRecordConverter extends RecordMaterializer { + + private final DataWritableGroupConverter root; + + public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) { + this.root = new DataWritableGroupConverter(requestedSchema, tableSchema); + } + + @Override + public ArrayWritable getCurrentRecord() { + return root.getCurrentArray(); + } + + @Override + public GroupConverter getRootConverter() { + return root; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java new file mode 100644 index 0000000..f7b9668 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -0,0 +1,160 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import java.math.BigDecimal; + +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable.DicBinaryWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; + +import parquet.column.Dictionary; +import parquet.io.api.Binary; +import parquet.io.api.Converter; +import parquet.io.api.PrimitiveConverter; + +/** + * + * ETypeConverter is an easy way to set the converter for the right type. + * + */ +public enum ETypeConverter { + + EDOUBLE_CONVERTER(Double.TYPE) { + @Override + Converter getConverter(final Class type, final int index, final HiveGroupConverter parent) { + return new PrimitiveConverter() { + @Override + public void addDouble(final double value) { + parent.set(index, new DoubleWritable(value)); + } + }; + } + }, + EBOOLEAN_CONVERTER(Boolean.TYPE) { + @Override + Converter getConverter(final Class type, final int index, final HiveGroupConverter parent) { + return new PrimitiveConverter() { + @Override + public void addBoolean(final boolean value) { + parent.set(index, new BooleanWritable(value)); + } + }; + } + }, + EFLOAT_CONVERTER(Float.TYPE) { + @Override + Converter getConverter(final Class type, final int index, final HiveGroupConverter parent) { + return new PrimitiveConverter() { + @Override + public void addFloat(final float value) { + parent.set(index, new FloatWritable(value)); + } + }; + } + }, + EINT32_CONVERTER(Integer.TYPE) { + @Override + Converter getConverter(final Class type, final int index, final HiveGroupConverter parent) { + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + parent.set(index, new IntWritable(value)); + } + }; + } + }, + EINT64_CONVERTER(Long.TYPE) { + @Override + Converter getConverter(final Class type, final int index, final HiveGroupConverter parent) { + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + parent.set(index, new LongWritable(value)); + } + }; + } + }, + EINT96_CONVERTER(BigDecimal.class) { + @Override + Converter getConverter(final Class type, final int index, final HiveGroupConverter parent) { + return new PrimitiveConverter() { + // TODO in HIVE-6367 decimal should not be treated as a double + @Override + public void addDouble(final double value) { + parent.set(index, new DoubleWritable(value)); + } + }; + } + }, + EBINARY_CONVERTER(Binary.class) { + @Override + Converter getConverter(final Class type, final int index, final HiveGroupConverter parent) { + return new PrimitiveConverter() { + private Binary[] dictBinary; + private String[] dict; + + @Override + public boolean hasDictionarySupport() { + return true; + } + + @Override + public void setDictionary(Dictionary dictionary) { + dictBinary = new Binary[dictionary.getMaxId() + 1]; + dict = new String[dictionary.getMaxId() + 1]; + for (int i = 0; i <= dictionary.getMaxId(); i++) { + Binary binary = dictionary.decodeToBinary(i); + dictBinary[i] = binary; + dict[i] = binary.toStringUsingUTF8(); + } + } + + @Override + public void addValueFromDictionary(int dictionaryId) { + parent.set(index, new DicBinaryWritable(dictBinary[dictionaryId], dict[dictionaryId])); + } + + @Override + public void addBinary(Binary value) { + parent.set(index, new BinaryWritable(value)); + } + }; + } + }; + final Class _type; + + private ETypeConverter(final Class type) { + this._type = type; + } + + private Class getType() { + return _type; + } + + abstract Converter getConverter(final Class type, final int index, final HiveGroupConverter parent); + + public static Converter getNewConverter(final Class type, final int index, final HiveGroupConverter parent) { + for (final ETypeConverter eConverter : values()) { + if (eConverter.getType() == type) { + return eConverter.getConverter(type, index, parent); + } + } + throw new IllegalArgumentException("Converter not found ... for type : " + type); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java new file mode 100644 index 0000000..20c8445 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java @@ -0,0 +1,46 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.io.Writable; + +import parquet.io.api.Converter; +import parquet.io.api.GroupConverter; +import parquet.schema.Type; +import parquet.schema.Type.Repetition; + +public abstract class HiveGroupConverter extends GroupConverter { + + protected static Converter getConverterFromDescription(final Type type, final int index, + final HiveGroupConverter parent) { + if (type == null) { + return null; + } + if (type.isPrimitive()) { + return ETypeConverter.getNewConverter(type.asPrimitiveType().getPrimitiveTypeName().javaType, + index, parent); + } else { + if (type.asGroupType().getRepetition() == Repetition.REPEATED) { + return new ArrayWritableGroupConverter(type.asGroupType(), parent, index); + } else { + return new DataWritableGroupConverter(type.asGroupType(), parent, index); + } + } + } + + protected abstract void set(int index, Writable value); + + protected abstract void add(int index, Writable value); + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java new file mode 100644 index 0000000..b5e9c8b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java @@ -0,0 +1,129 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import java.util.List; + +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import parquet.schema.GroupType; +import parquet.schema.MessageType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.PrimitiveType.PrimitiveTypeName; +import parquet.schema.Type; +import parquet.schema.Type.Repetition; + +public class HiveSchemaConverter { + + public static MessageType convert(final List columnNames, final List columnTypes) { + final MessageType schema = new MessageType("hive_schema", convertTypes(columnNames, columnTypes)); + return schema; + } + + private static Type[] convertTypes(final List columnNames, final List columnTypes) { + if (columnNames.size() != columnTypes.size()) { + throw new IllegalStateException("Mismatched Hive columns and types. Hive columns names" + + " found : " + columnNames + " . And Hive types found : " + columnTypes); + } + final Type[] types = new Type[columnNames.size()]; + for (int i = 0; i < columnNames.size(); ++i) { + types[i] = convertType(columnNames.get(i), columnTypes.get(i)); + } + return types; + } + + private static Type convertType(final String name, final TypeInfo typeInfo) { + return convertType(name, typeInfo, Repetition.OPTIONAL); + } + + private static Type convertType(final String name, final TypeInfo typeInfo, final Repetition repetition) { + if (typeInfo.getCategory().equals(Category.PRIMITIVE)) { + if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { + return new PrimitiveType(repetition, PrimitiveTypeName.BINARY, name); + } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) || + typeInfo.equals(TypeInfoFactory.shortTypeInfo) || + typeInfo.equals(TypeInfoFactory.byteTypeInfo)) { + return new PrimitiveType(repetition, PrimitiveTypeName.INT32, name); + } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) { + return new PrimitiveType(repetition, PrimitiveTypeName.INT64, name); + } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) { + return new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, name); + } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) { + return new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, name); + } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) { + return new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, name); + } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) { + // TODO : binaryTypeInfo is a byte array. Need to map it + throw new UnsupportedOperationException("Binary type not implemented"); + } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { + throw new UnsupportedOperationException("Timestamp type not implemented"); + } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) { + throw new UnsupportedOperationException("Void type not implemented"); + } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) { + throw new UnsupportedOperationException("Unknown type not implemented"); + } else { + throw new IllegalArgumentException("Unknown type: " + typeInfo); + } + } else if (typeInfo.getCategory().equals(Category.LIST)) { + return convertArrayType(name, (ListTypeInfo) typeInfo); + } else if (typeInfo.getCategory().equals(Category.STRUCT)) { + return convertStructType(name, (StructTypeInfo) typeInfo); + } else if (typeInfo.getCategory().equals(Category.MAP)) { + return convertMapType(name, (MapTypeInfo) typeInfo); + } else if (typeInfo.getCategory().equals(Category.UNION)) { + throw new UnsupportedOperationException("Union type not implemented"); + } else { + throw new IllegalArgumentException("Unknown type: " + typeInfo); + } + } + + // An optional group containing a repeated anonymous group "bag", containing + // 1 anonymous element "array_element" + private static GroupType convertArrayType(final String name, final ListTypeInfo typeInfo) { + final TypeInfo subType = typeInfo.getListElementTypeInfo(); + return listWrapper(name, OriginalType.LIST, new GroupType(Repetition.REPEATED, + ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType))); + } + + // An optional group containing multiple elements + private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo) { + final List columnNames = typeInfo.getAllStructFieldNames(); + final List columnTypes = typeInfo.getAllStructFieldTypeInfos(); + return new GroupType(Repetition.OPTIONAL, name, convertTypes(columnNames, columnTypes)); + + } + + // An optional group containing a repeated anonymous group "map", containing + // 2 elements: "key", "value" + private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) { + final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(), + typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED); + final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), + typeInfo.getMapValueTypeInfo()); + return listWrapper(name, OriginalType.MAP_KEY_VALUE, + new GroupType(Repetition.REPEATED, ParquetHiveSerDe.MAP.toString(), keyType, valueType)); + } + + private static GroupType listWrapper(final String name, final OriginalType originalType, + final GroupType groupType) { + return new GroupType(Repetition.OPTIONAL, name, originalType, groupType); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java new file mode 100644 index 0000000..9087307 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -0,0 +1,127 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.read; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.util.StringUtils; + +import parquet.hadoop.api.ReadSupport; +import parquet.io.api.RecordMaterializer; +import parquet.schema.MessageType; +import parquet.schema.MessageTypeParser; +import parquet.schema.PrimitiveType; +import parquet.schema.PrimitiveType.PrimitiveTypeName; +import parquet.schema.Type; +import parquet.schema.Type.Repetition; + +/** + * + * A MapWritableReadSupport + * + * Manages the translation between Hive and Parquet + * + */ +public class DataWritableReadSupport extends ReadSupport { + + private static final String TABLE_SCHEMA = "table_schema"; + public static final String HIVE_SCHEMA_KEY = "HIVE_TABLE_SCHEMA"; + + /** + * From a string which columns names (including hive column), return a list + * of string columns + * + * @param comma separated list of columns + * @return list with virtual columns removed + */ + private static List getColumns(final String columns) { + return (List) VirtualColumn. + removeVirtualColumns(StringUtils.getStringCollection(columns)); + } + /** + * + * It creates the readContext for Parquet side with the requested schema during the init phase. + * + * @param configuration needed to get the wanted columns + * @param keyValueMetaData // unused + * @param fileSchema parquet file schema + * @return the parquet ReadContext + */ + @Override + public parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration, + final Map keyValueMetaData, final MessageType fileSchema) { + final String columns = configuration.get(IOConstants.COLUMNS); + final Map contextMetadata = new HashMap(); + if (columns != null) { + final List listColumns = getColumns(columns); + + final List typeListTable = new ArrayList(); + for (final String col : listColumns) { + // listColumns contains partition columns which are metadata only + if (fileSchema.containsField(col)) { + typeListTable.add(fileSchema.getType(col)); + } + } + MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable); + contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString()); + + MessageType requestedSchemaByUser = tableSchema; + final List indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); + + final List typeListWanted = new ArrayList(); + for (final Integer idx : indexColumnsWanted) { + typeListWanted.add(tableSchema.getType(listColumns.get(idx))); + } + requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted); + + return new ReadContext(requestedSchemaByUser, contextMetadata); + } else { + contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString()); + return new ReadContext(fileSchema, contextMetadata); + } + } + + /** + * + * It creates the hive read support to interpret data from parquet to hive + * + * @param configuration // unused + * @param keyValueMetaData + * @param fileSchema // unused + * @param readContext containing the requested schema and the schema of the hive table + * @return Record Materialize for Hive + */ + @Override + public RecordMaterializer prepareForRead(final Configuration configuration, + final Map keyValueMetaData, final MessageType fileSchema, + final parquet.hadoop.api.ReadSupport.ReadContext readContext) { + final Map metadata = readContext.getReadSupportMetadata(); + if (metadata == null) { + throw new IllegalStateException("ReadContext not initialized properly. " + + "Don't know the Hive Schema."); + } + final MessageType tableSchema = MessageTypeParser. + parseMessageType(metadata.get(HIVE_SCHEMA_KEY)); + return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java new file mode 100644 index 0000000..e1a7a48 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -0,0 +1,236 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.read; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; + +import parquet.hadoop.ParquetFileReader; +import parquet.hadoop.ParquetInputFormat; +import parquet.hadoop.ParquetInputSplit; +import parquet.hadoop.api.ReadSupport.ReadContext; +import parquet.hadoop.metadata.BlockMetaData; +import parquet.hadoop.metadata.FileMetaData; +import parquet.hadoop.metadata.ParquetMetadata; +import parquet.hadoop.util.ContextUtil; +import parquet.schema.MessageTypeParser; + +public class ParquetRecordReaderWrapper implements RecordReader { + public static final Log LOG = LogFactory.getLog(ParquetRecordReaderWrapper.class); + + private final long splitLen; // for getPos() + + private org.apache.hadoop.mapreduce.RecordReader realReader; + // expect readReader return same Key & Value objects (common case) + // this avoids extra serialization & deserialization of these objects + private ArrayWritable valueObj = null; + private boolean firstRecord = false; + private boolean eof = false; + private int schemaSize; + + private final ProjectionPusher projectionPusher; + + public ParquetRecordReaderWrapper( + final ParquetInputFormat newInputFormat, + final InputSplit oldSplit, + final JobConf oldJobConf, + final Reporter reporter) + throws IOException, InterruptedException { + this(newInputFormat, oldSplit, oldJobConf, reporter, new ProjectionPusher()); + } + + public ParquetRecordReaderWrapper( + final ParquetInputFormat newInputFormat, + final InputSplit oldSplit, + final JobConf oldJobConf, + final Reporter reporter, + final ProjectionPusher pusher) + throws IOException, InterruptedException { + this.splitLen = oldSplit.getLength(); + this.projectionPusher = pusher; + + final ParquetInputSplit split = getSplit(oldSplit, oldJobConf); + + TaskAttemptID taskAttemptID = TaskAttemptID.forName(oldJobConf.get(IOConstants.MAPRED_TASK_ID)); + if (taskAttemptID == null) { + taskAttemptID = new TaskAttemptID(); + } + + // create a TaskInputOutputContext + final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID); + + if (split != null) { + try { + realReader = newInputFormat.createRecordReader(split, taskContext); + realReader.initialize(split, taskContext); + + // read once to gain access to key and value objects + if (realReader.nextKeyValue()) { + firstRecord = true; + valueObj = realReader.getCurrentValue(); + } else { + eof = true; + } + } catch (final InterruptedException e) { + throw new IOException(e); + } + } else { + realReader = null; + eof = true; + if (valueObj == null) { // Should initialize the value for createValue + valueObj = new ArrayWritable(Writable.class, new Writable[schemaSize]); + } + } + } + + @Override + public void close() throws IOException { + if (realReader != null) { + realReader.close(); + } + } + + @Override + public Void createKey() { + return null; + } + + @Override + public ArrayWritable createValue() { + return valueObj; + } + + @Override + public long getPos() throws IOException { + return (long) (splitLen * getProgress()); + } + + @Override + public float getProgress() throws IOException { + if (realReader == null) { + return 1f; + } else { + try { + return realReader.getProgress(); + } catch (final InterruptedException e) { + throw new IOException(e); + } + } + } + + @Override + public boolean next(final Void key, final ArrayWritable value) throws IOException { + if (eof) { + return false; + } + try { + if (firstRecord) { // key & value are already read. + firstRecord = false; + } else if (!realReader.nextKeyValue()) { + eof = true; // strictly not required, just for consistency + return false; + } + + final ArrayWritable tmpCurValue = realReader.getCurrentValue(); + if (value != tmpCurValue) { + final Writable[] arrValue = value.get(); + final Writable[] arrCurrent = tmpCurValue.get(); + if (value != null && arrValue.length == arrCurrent.length) { + System.arraycopy(arrCurrent, 0, arrValue, 0, arrCurrent.length); + } else { + if (arrValue.length != arrCurrent.length) { + throw new IOException("DeprecatedParquetHiveInput : size of object differs. Value" + + " size : " + arrValue.length + ", Current Object size : " + arrCurrent.length); + } else { + throw new IOException("DeprecatedParquetHiveInput can not support RecordReaders that" + + " don't return same key & value & value is null"); + } + } + } + return true; + } catch (final InterruptedException e) { + throw new IOException(e); + } + } + + /** + * gets a ParquetInputSplit corresponding to a split given by Hive + * + * @param oldSplit The split given by Hive + * @param conf The JobConf of the Hive job + * @return a ParquetInputSplit corresponding to the oldSplit + * @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file + */ + protected ParquetInputSplit getSplit( + final InputSplit oldSplit, + final JobConf conf + ) throws IOException { + ParquetInputSplit split; + if (oldSplit instanceof FileSplit) { + final Path finalPath = ((FileSplit) oldSplit).getPath(); + final JobConf cloneJob = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent()); + + final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath); + final List blocks = parquetMetadata.getBlocks(); + final FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); + + final ReadContext readContext = new DataWritableReadSupport() + .init(cloneJob, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema()); + schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() + .get(DataWritableReadSupport.HIVE_SCHEMA_KEY)).getFieldCount(); + final List splitGroup = new ArrayList(); + final long splitStart = ((FileSplit) oldSplit).getStart(); + final long splitLength = ((FileSplit) oldSplit).getLength(); + for (final BlockMetaData block : blocks) { + final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); + if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) { + splitGroup.add(block); + } + } + if (splitGroup.isEmpty()) { + LOG.warn("Skipping split, could not find row group in: " + (FileSplit) oldSplit); + split = null; + } else { + split = new ParquetInputSplit(finalPath, + splitStart, + splitLength, + ((FileSplit) oldSplit).getLocations(), + splitGroup, + readContext.getRequestedSchema().toString(), + fileMetaData.getSchema().toString(), + fileMetaData.getKeyValueMetaData(), + readContext.getReadSupportMetadata()); + } + } else { + throw new IllegalArgumentException("Unknown split type: " + oldSplit); + } + return split; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java new file mode 100644 index 0000000..1d72747 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java @@ -0,0 +1,163 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; + +public abstract class AbstractParquetMapInspector implements SettableMapObjectInspector { + + protected final ObjectInspector keyInspector; + protected final ObjectInspector valueInspector; + + public AbstractParquetMapInspector(final ObjectInspector keyInspector, final ObjectInspector valueInspector) { + this.keyInspector = keyInspector; + this.valueInspector = valueInspector; + } + + @Override + public String getTypeName() { + return "map<" + keyInspector.getTypeName() + "," + valueInspector.getTypeName() + ">"; + } + + @Override + public Category getCategory() { + return Category.MAP; + } + + @Override + public ObjectInspector getMapKeyObjectInspector() { + return keyInspector; + } + + @Override + public ObjectInspector getMapValueObjectInspector() { + return valueInspector; + } + + @Override + public Map getMap(final Object data) { + if (data == null) { + return null; + } + + if (data instanceof ArrayWritable) { + final Writable[] mapContainer = ((ArrayWritable) data).get(); + + if (mapContainer == null || mapContainer.length == 0) { + return null; + } + + final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); + final Map map = new HashMap(); + + for (final Writable obj : mapArray) { + final ArrayWritable mapObj = (ArrayWritable) obj; + final Writable[] arr = mapObj.get(); + map.put(arr[0], arr[1]); + } + + return map; + } + + if (data instanceof Map) { + return (Map) data; + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } + + @Override + public int getMapSize(final Object data) { + if (data == null) { + return -1; + } + + if (data instanceof ArrayWritable) { + final Writable[] mapContainer = ((ArrayWritable) data).get(); + + if (mapContainer == null || mapContainer.length == 0) { + return -1; + } else { + return ((ArrayWritable) mapContainer[0]).get().length; + } + } + + if (data instanceof Map) { + return ((Map) data).size(); + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } + + @Override + public Object create() { + Map m = new HashMap(); + return m; + } + + @Override + public Object put(Object map, Object key, Object value) { + Map m = (HashMap) map; + m.put(key, value); + return m; + } + + @Override + public Object remove(Object map, Object key) { + Map m = (HashMap) map; + m.remove(key); + return m; + } + + @Override + public Object clear(Object map) { + Map m = (HashMap) map; + m.clear(); + return m; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final StandardParquetHiveMapInspector other = (StandardParquetHiveMapInspector) obj; + if (this.keyInspector != other.keyInspector && + (this.keyInspector == null || !this.keyInspector.equals(other.keyInspector))) { + return false; + } + if (this.valueInspector != other.valueInspector && + (this.valueInspector == null || !this.valueInspector.equals(other.valueInspector))) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 7; + hash = 59 * hash + (this.keyInspector != null ? this.keyInspector.hashCode() : 0); + hash = 59 * hash + (this.valueInspector != null ? this.valueInspector.hashCode() : 0); + return hash; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java new file mode 100644 index 0000000..a2c7fe0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java @@ -0,0 +1,222 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.hive.ql.io.parquet.serde.primitive.ParquetPrimitiveInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.ArrayWritable; + +/** + * + * The ArrayWritableObjectInspector will inspect an ArrayWritable, considering it as a Hive struct.
+ * It can also inspect a List if Hive decides to inspect the result of an inspection. + * + */ +public class ArrayWritableObjectInspector extends SettableStructObjectInspector { + + private final TypeInfo typeInfo; + private final List fieldInfos; + private final List fieldNames; + private final List fields; + private final HashMap fieldsByName; + + public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) { + + typeInfo = rowTypeInfo; + fieldNames = rowTypeInfo.getAllStructFieldNames(); + fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos(); + fields = new ArrayList(fieldNames.size()); + fieldsByName = new HashMap(); + + for (int i = 0; i < fieldNames.size(); ++i) { + final String name = fieldNames.get(i); + final TypeInfo fieldInfo = fieldInfos.get(i); + + final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i); + fields.add(field); + fieldsByName.put(name, field); + } + } + + private ObjectInspector getObjectInspector(final TypeInfo typeInfo) { + if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) { + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) { + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) { + return PrimitiveObjectInspectorFactory.writableFloatObjectInspector; + } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) { + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; + } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) { + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { + return ParquetPrimitiveInspectorFactory.parquetStringInspector; + } else if (typeInfo.getCategory().equals(Category.STRUCT)) { + return new ArrayWritableObjectInspector((StructTypeInfo) typeInfo); + } else if (typeInfo.getCategory().equals(Category.LIST)) { + final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo(); + return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo)); + } else if (typeInfo.getCategory().equals(Category.MAP)) { + final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo(); + final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo(); + if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo) + || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) { + return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo)); + } else { + return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo)); + } + } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { + throw new UnsupportedOperationException("timestamp not implemented yet"); + } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) { + return ParquetPrimitiveInspectorFactory.parquetByteInspector; + } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) { + return ParquetPrimitiveInspectorFactory.parquetShortInspector; + } else { + throw new IllegalArgumentException("Unknown field info: " + typeInfo); + } + + } + + @Override + public Category getCategory() { + return Category.STRUCT; + } + + @Override + public String getTypeName() { + return typeInfo.getTypeName(); + } + + @Override + public List getAllStructFieldRefs() { + return fields; + } + + @Override + public Object getStructFieldData(final Object data, final StructField fieldRef) { + if (data == null) { + return null; + } + + if (data instanceof ArrayWritable) { + final ArrayWritable arr = (ArrayWritable) data; + return arr.get()[((StructFieldImpl) fieldRef).getIndex()]; + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } + + @Override + public StructField getStructFieldRef(final String name) { + return fieldsByName.get(name); + } + + @Override + public List getStructFieldsDataAsList(final Object data) { + if (data == null) { + return null; + } + + if (data instanceof ArrayWritable) { + final ArrayWritable arr = (ArrayWritable) data; + final Object[] arrWritable = arr.get(); + return new ArrayList(Arrays.asList(arrWritable)); + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } + + @Override + public Object create() { + final ArrayList list = new ArrayList(fields.size()); + for (int i = 0; i < fields.size(); ++i) { + list.add(null); + } + return list; + } + + @Override + public Object setStructFieldData(Object struct, StructField field, Object fieldValue) { + final ArrayList list = (ArrayList) struct; + list.set(((StructFieldImpl) field).getIndex(), fieldValue); + return list; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final ArrayWritableObjectInspector other = (ArrayWritableObjectInspector) obj; + if (this.typeInfo != other.typeInfo && (this.typeInfo == null || !this.typeInfo.equals(other.typeInfo))) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 5; + hash = 29 * hash + (this.typeInfo != null ? this.typeInfo.hashCode() : 0); + return hash; + } + + class StructFieldImpl implements StructField { + + private final String name; + private final ObjectInspector inspector; + private final int index; + + public StructFieldImpl(final String name, final ObjectInspector inspector, final int index) { + this.name = name; + this.inspector = inspector; + this.index = index; + } + + @Override + public String getFieldComment() { + return ""; + } + + @Override + public String getFieldName() { + return name; + } + + public int getIndex() { + return index; + } + + @Override + public ObjectInspector getFieldObjectInspector() { + return inspector; + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/DeepParquetHiveMapInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/DeepParquetHiveMapInspector.java new file mode 100644 index 0000000..d38c641 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/DeepParquetHiveMapInspector.java @@ -0,0 +1,82 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.Map; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; + +/** + * The DeepParquetHiveMapInspector will inspect an ArrayWritable, considering it as a Hive map.
+ * It can also inspect a Map if Hive decides to inspect the result of an inspection.
+ * When trying to access elements from the map it will iterate over all keys, inspecting them and comparing them to the + * desired key. + * + */ +public class DeepParquetHiveMapInspector extends AbstractParquetMapInspector { + + public DeepParquetHiveMapInspector(final ObjectInspector keyInspector, final ObjectInspector valueInspector) { + super(keyInspector, valueInspector); + } + + @Override + public Object getMapValueElement(final Object data, final Object key) { + if (data == null || key == null) { + return null; + } + + if (data instanceof ArrayWritable) { + final Writable[] mapContainer = ((ArrayWritable) data).get(); + + if (mapContainer == null || mapContainer.length == 0) { + return null; + } + + final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); + + for (final Writable obj : mapArray) { + final ArrayWritable mapObj = (ArrayWritable) obj; + final Writable[] arr = mapObj.get(); + if (key.equals(arr[0]) || key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(arr[0])) + || key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveWritableObject(arr[0]))) { + return arr[1]; + } + } + + return null; + } + + if (data instanceof Map) { + final Map map = (Map) data; + + if (map.containsKey(key)) { + return map.get(key); + } + + for (final Map.Entry entry : map.entrySet()) { + if (key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(entry.getKey())) + || key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveWritableObject(entry.getKey()))) { + return entry.getValue(); + } + } + + return null; + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java new file mode 100644 index 0000000..53ca31d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java @@ -0,0 +1,185 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; + +/** + * The ParquetHiveArrayInspector will inspect an ArrayWritable, considering it as an Hive array.
+ * It can also inspect a List if Hive decides to inspect the result of an inspection. + * + */ +public class ParquetHiveArrayInspector implements SettableListObjectInspector { + + ObjectInspector arrayElementInspector; + + public ParquetHiveArrayInspector(final ObjectInspector arrayElementInspector) { + this.arrayElementInspector = arrayElementInspector; + } + + @Override + public String getTypeName() { + return "array<" + arrayElementInspector.getTypeName() + ">"; + } + + @Override + public Category getCategory() { + return Category.LIST; + } + + @Override + public ObjectInspector getListElementObjectInspector() { + return arrayElementInspector; + } + + @Override + public Object getListElement(final Object data, final int index) { + if (data == null) { + return null; + } + + if (data instanceof ArrayWritable) { + final Writable[] listContainer = ((ArrayWritable) data).get(); + + if (listContainer == null || listContainer.length == 0) { + return null; + } + + final Writable subObj = listContainer[0]; + + if (subObj == null) { + return null; + } + + if (index >= 0 && index < ((ArrayWritable) subObj).get().length) { + return ((ArrayWritable) subObj).get()[index]; + } else { + return null; + } + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } + + @Override + public int getListLength(final Object data) { + if (data == null) { + return -1; + } + + if (data instanceof ArrayWritable) { + final Writable[] listContainer = ((ArrayWritable) data).get(); + + if (listContainer == null || listContainer.length == 0) { + return -1; + } + + final Writable subObj = listContainer[0]; + + if (subObj == null) { + return 0; + } + + return ((ArrayWritable) subObj).get().length; + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } + + @Override + public List getList(final Object data) { + if (data == null) { + return null; + } + + if (data instanceof ArrayWritable) { + final Writable[] listContainer = ((ArrayWritable) data).get(); + + if (listContainer == null || listContainer.length == 0) { + return null; + } + + final Writable subObj = listContainer[0]; + + if (subObj == null) { + return null; + } + + final Writable[] array = ((ArrayWritable) subObj).get(); + final List list = new ArrayList(); + + for (final Writable obj : array) { + list.add(obj); + } + + return list; + } + + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } + + @Override + public Object create(final int size) { + final ArrayList result = new ArrayList(size); + for (int i = 0; i < size; ++i) { + result.add(null); + } + return result; + } + + @Override + public Object set(final Object list, final int index, final Object element) { + final ArrayList l = (ArrayList) list; + l.set(index, element); + return list; + } + + @Override + public Object resize(final Object list, final int newSize) { + final ArrayList l = (ArrayList) list; + l.ensureCapacity(newSize); + while (l.size() < newSize) { + l.add(null); + } + while (l.size() > newSize) { + l.remove(l.size() - 1); + } + return list; + } + + @Override + public boolean equals(final Object o) { + if (o == null || o.getClass() != getClass()) { + return false; + } else if (o == this) { + return true; + } else { + final ObjectInspector other = ((ParquetHiveArrayInspector) o).arrayElementInspector; + return other.equals(arrayElementInspector); + } + } + + @Override + public int hashCode() { + int hash = 3; + hash = 29 * hash + (this.arrayElementInspector != null ? this.arrayElementInspector.hashCode() : 0); + return hash; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java new file mode 100644 index 0000000..b689336 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -0,0 +1,274 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +import parquet.io.api.Binary; + +/** + * + * A ParquetHiveSerDe for Hive (with the deprecated package mapred) + * + */ +public class ParquetHiveSerDe extends AbstractSerDe { + + public static final Text MAP_KEY = new Text("key"); + public static final Text MAP_VALUE = new Text("value"); + public static final Text MAP = new Text("map"); + public static final Text ARRAY = new Text("bag"); + + private SerDeStats stats; + private ObjectInspector objInspector; + + private enum LAST_OPERATION { + SERIALIZE, + DESERIALIZE, + UNKNOWN + } + + private LAST_OPERATION status; + private long serializedSize; + private long deserializedSize; + + @Override + public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { + + final TypeInfo rowTypeInfo; + final List columnNames; + final List columnTypes; + // Get column names and sort order + final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS); + final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES); + + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + if (columnNames.size() != columnTypes.size()) { + throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + + columnTypes); + } + // Create row related objects + rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); + + // Stats part + stats = new SerDeStats(); + serializedSize = 0; + deserializedSize = 0; + status = LAST_OPERATION.UNKNOWN; + } + + @Override + public Object deserialize(final Writable blob) throws SerDeException { + status = LAST_OPERATION.DESERIALIZE; + deserializedSize = 0; + if (blob instanceof ArrayWritable) { + deserializedSize = ((ArrayWritable) blob).get().length; + return blob; + } else { + return null; + } + } + + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return objInspector; + } + + @Override + public Class getSerializedClass() { + return ArrayWritable.class; + } + + @Override + public Writable serialize(final Object obj, final ObjectInspector objInspector) + throws SerDeException { + if (!objInspector.getCategory().equals(Category.STRUCT)) { + throw new SerDeException("Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct"); + } + final ArrayWritable serializeData = createStruct(obj, (StructObjectInspector) objInspector); + serializedSize = serializeData.get().length; + status = LAST_OPERATION.SERIALIZE; + return serializeData; + } + + private ArrayWritable createStruct(final Object obj, final StructObjectInspector inspector) + throws SerDeException { + final List fields = inspector.getAllStructFieldRefs(); + final Writable[] arr = new Writable[fields.size()]; + for (int i = 0; i < fields.size(); i++) { + final StructField field = fields.get(i); + final Object subObj = inspector.getStructFieldData(obj, field); + final ObjectInspector subInspector = field.getFieldObjectInspector(); + arr[i] = createObject(subObj, subInspector); + } + return new ArrayWritable(Writable.class, arr); + } + + private Writable createMap(final Object obj, final MapObjectInspector inspector) + throws SerDeException { + final Map sourceMap = inspector.getMap(obj); + final ObjectInspector keyInspector = inspector.getMapKeyObjectInspector(); + final ObjectInspector valueInspector = inspector.getMapValueObjectInspector(); + final List array = new ArrayList(); + + if (sourceMap != null) { + for (final Entry keyValue : sourceMap.entrySet()) { + final Writable key = createObject(keyValue.getKey(), keyInspector); + final Writable value = createObject(keyValue.getValue(), valueInspector); + if (key != null) { + Writable[] arr = new Writable[2]; + arr[0] = key; + arr[1] = value; + array.add(new ArrayWritable(Writable.class, arr)); + } + } + } + if (array.size() > 0) { + final ArrayWritable subArray = new ArrayWritable(ArrayWritable.class, + array.toArray(new ArrayWritable[array.size()])); + return new ArrayWritable(Writable.class, new Writable[] {subArray}); + } else { + return null; + } + } + + private ArrayWritable createArray(final Object obj, final ListObjectInspector inspector) + throws SerDeException { + final List sourceArray = inspector.getList(obj); + final ObjectInspector subInspector = inspector.getListElementObjectInspector(); + final List array = new ArrayList(); + if (sourceArray != null) { + for (final Object curObj : sourceArray) { + final Writable newObj = createObject(curObj, subInspector); + if (newObj != null) { + array.add(newObj); + } + } + } + if (array.size() > 0) { + final ArrayWritable subArray = new ArrayWritable(array.get(0).getClass(), + array.toArray(new Writable[array.size()])); + return new ArrayWritable(Writable.class, new Writable[] {subArray}); + } else { + return null; + } + } + + private Writable createPrimitive(final Object obj, final PrimitiveObjectInspector inspector) + throws SerDeException { + if (obj == null) { + return null; + } + switch (inspector.getPrimitiveCategory()) { + case VOID: + return null; + case BOOLEAN: + return new BooleanWritable(((BooleanObjectInspector) inspector).get(obj) ? Boolean.TRUE : Boolean.FALSE); + case BYTE: + return new ByteWritable((byte) ((ByteObjectInspector) inspector).get(obj)); + case DOUBLE: + return new DoubleWritable(((DoubleObjectInspector) inspector).get(obj)); + case FLOAT: + return new FloatWritable(((FloatObjectInspector) inspector).get(obj)); + case INT: + return new IntWritable(((IntObjectInspector) inspector).get(obj)); + case LONG: + return new LongWritable(((LongObjectInspector) inspector).get(obj)); + case SHORT: + return new ShortWritable((short) ((ShortObjectInspector) inspector).get(obj)); + case STRING: + return new BinaryWritable(Binary.fromString(((StringObjectInspector) inspector).getPrimitiveJavaObject(obj))); + default: + throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory()); + } + } + + private Writable createObject(final Object obj, final ObjectInspector inspector) throws SerDeException { + switch (inspector.getCategory()) { + case STRUCT: + return createStruct(obj, (StructObjectInspector) inspector); + case LIST: + return createArray(obj, (ListObjectInspector) inspector); + case MAP: + return createMap(obj, (MapObjectInspector) inspector); + case PRIMITIVE: + return createPrimitive(obj, (PrimitiveObjectInspector) inspector); + default: + throw new SerDeException("Unknown data type" + inspector.getCategory()); + } + } + + @Override + public SerDeStats getSerDeStats() { + // must be different + assert (status != LAST_OPERATION.UNKNOWN); + if (status == LAST_OPERATION.SERIALIZE) { + stats.setRawDataSize(serializedSize); + } else { + stats.setRawDataSize(deserializedSize); + } + return stats; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/StandardParquetHiveMapInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/StandardParquetHiveMapInspector.java new file mode 100644 index 0000000..5aa1448 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/StandardParquetHiveMapInspector.java @@ -0,0 +1,60 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.Map; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; + +/** + * The StandardParquetHiveMapInspector will inspect an ArrayWritable, considering it as a Hive map.
+ * It can also inspect a Map if Hive decides to inspect the result of an inspection. + * + */ +public class StandardParquetHiveMapInspector extends AbstractParquetMapInspector { + + public StandardParquetHiveMapInspector(final ObjectInspector keyInspector, + final ObjectInspector valueInspector) { + super(keyInspector, valueInspector); + } + + @Override + public Object getMapValueElement(final Object data, final Object key) { + if (data == null || key == null) { + return null; + } + if (data instanceof ArrayWritable) { + final Writable[] mapContainer = ((ArrayWritable) data).get(); + + if (mapContainer == null || mapContainer.length == 0) { + return null; + } + final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); + for (final Writable obj : mapArray) { + final ArrayWritable mapObj = (ArrayWritable) obj; + final Writable[] arr = mapObj.get(); + if (key.equals(arr[0])) { + return arr[1]; + } + } + return null; + } + if (data instanceof Map) { + return ((Map) data).get(key); + } + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java new file mode 100644 index 0000000..d5d1bf1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java @@ -0,0 +1,56 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde.primitive; + +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector; +import org.apache.hadoop.io.IntWritable; + +/** + * The ParquetByteInspector can inspect both ByteWritables and IntWritables into bytes. + * + */ +public class ParquetByteInspector extends AbstractPrimitiveJavaObjectInspector implements SettableByteObjectInspector { + + ParquetByteInspector() { + super(TypeInfoFactory.byteTypeInfo); + } + + @Override + public Object getPrimitiveWritableObject(final Object o) { + return o == null ? null : new ByteWritable(get(o)); + } + + @Override + public Object create(final byte val) { + return new ByteWritable(val); + } + + @Override + public Object set(final Object o, final byte val) { + ((ByteWritable) o).set(val); + return o; + } + + @Override + public byte get(Object o) { + // Accept int writables and convert them. + if (o instanceof IntWritable) { + return (byte) ((IntWritable) o).get(); + } + return ((ByteWritable) o).get(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetPrimitiveInspectorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetPrimitiveInspectorFactory.java new file mode 100644 index 0000000..79d88ce --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetPrimitiveInspectorFactory.java @@ -0,0 +1,29 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde.primitive; + +/** + * The ParquetPrimitiveInspectorFactory allows us to be sure that the same object is inspected by the same inspector. + * + */ +public class ParquetPrimitiveInspectorFactory { + + public static final ParquetByteInspector parquetByteInspector = new ParquetByteInspector(); + public static final ParquetShortInspector parquetShortInspector = new ParquetShortInspector(); + public static final ParquetStringInspector parquetStringInspector = new ParquetStringInspector(); + + private ParquetPrimitiveInspectorFactory() { + // prevent instantiation + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java new file mode 100644 index 0000000..94f2813 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java @@ -0,0 +1,56 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde.primitive; + +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector; +import org.apache.hadoop.io.IntWritable; + +/** + * The ParquetShortInspector can inspect both ShortWritables and IntWritables into shorts. + * + */ +public class ParquetShortInspector extends AbstractPrimitiveJavaObjectInspector implements SettableShortObjectInspector { + + ParquetShortInspector() { + super(TypeInfoFactory.shortTypeInfo); + } + + @Override + public Object getPrimitiveWritableObject(final Object o) { + return o == null ? null : new ShortWritable(get(o)); + } + + @Override + public Object create(final short val) { + return new ShortWritable(val); + } + + @Override + public Object set(final Object o, final short val) { + ((ShortWritable) o).set(val); + return o; + } + + @Override + public short get(Object o) { + // Accept int writables and convert them. + if (o instanceof IntWritable) { + return (short) ((IntWritable) o).get(); + } + return ((ShortWritable) o).get(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java new file mode 100644 index 0000000..03e8369 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java @@ -0,0 +1,98 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde.primitive; + +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector; +import org.apache.hadoop.io.Text; + +import parquet.io.api.Binary; + +/** + * The ParquetStringInspector inspects a BinaryWritable to give a Text or String. + * + */ +public class ParquetStringInspector extends AbstractPrimitiveJavaObjectInspector implements SettableStringObjectInspector { + + ParquetStringInspector() { + super(TypeInfoFactory.stringTypeInfo); + } + + @Override + public Text getPrimitiveWritableObject(final Object o) { + if (o == null) { + return null; + } + + if (o instanceof BinaryWritable) { + return new Text(((BinaryWritable) o).getBytes()); + } + + if (o instanceof Text) { + return (Text) o; + } + + if (o instanceof String) { + return new Text((String) o); + } + + throw new UnsupportedOperationException("Cannot inspect " + o.getClass().getCanonicalName()); + } + + @Override + public String getPrimitiveJavaObject(final Object o) { + if (o == null) { + return null; + } + + if (o instanceof BinaryWritable) { + return ((BinaryWritable) o).getString(); + } + + if (o instanceof Text) { + return ((Text) o).toString(); + } + + if (o instanceof String) { + return (String) o; + } + + throw new UnsupportedOperationException("Cannot inspect " + o.getClass().getCanonicalName()); + } + + @Override + public Object set(final Object o, final Text text) { + return new BinaryWritable(text == null ? null : Binary.fromByteArray(text.getBytes())); + } + + @Override + public Object set(final Object o, final String string) { + return new BinaryWritable(string == null ? null : Binary.fromString(string)); + } + + @Override + public Object create(final Text text) { + if (text == null) { + return null; + } + return text.toString(); + } + + @Override + public Object create(final String string) { + return string; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BigDecimalWritable.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BigDecimalWritable.java new file mode 100644 index 0000000..c5d6394 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BigDecimalWritable.java @@ -0,0 +1,143 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableUtils; + +/** + * This file is taken from a patch to hive 0.11 + * Issue : https://issues.apache.org/jira/browse/HIVE-2693 + * + */ +public class BigDecimalWritable implements WritableComparable { + + private byte[] internalStorage = new byte[0]; + private int scale; + + private final VInt vInt = new VInt(); // reusable integer + + public BigDecimalWritable() { + } + + public BigDecimalWritable(final byte[] bytes, final int scale) { + set(bytes, scale); + } + + public BigDecimalWritable(final BigDecimalWritable writable) { + set(writable.getBigDecimal()); + } + + public BigDecimalWritable(final BigDecimal value) { + set(value); + } + + public void set(BigDecimal value) { + value = value.stripTrailingZeros(); + if (value.compareTo(BigDecimal.ZERO) == 0) { + // Special case for 0, because java doesn't strip zeros correctly on + // that number. + value = BigDecimal.ZERO; + } + set(value.unscaledValue().toByteArray(), value.scale()); + } + + public void set(final BigDecimalWritable writable) { + set(writable.getBigDecimal()); + } + + public void set(final byte[] bytes, final int scale) { + this.internalStorage = bytes; + this.scale = scale; + } + + public void setFromBytes(final byte[] bytes, int offset, final int length) { + LazyBinaryUtils.readVInt(bytes, offset, vInt); + scale = vInt.value; + offset += vInt.length; + LazyBinaryUtils.readVInt(bytes, offset, vInt); + offset += vInt.length; + if (internalStorage.length != vInt.value) { + internalStorage = new byte[vInt.value]; + } + System.arraycopy(bytes, offset, internalStorage, 0, vInt.value); + } + + public BigDecimal getBigDecimal() { + return new BigDecimal(new BigInteger(internalStorage), scale); + } + + @Override + public void readFields(final DataInput in) throws IOException { + scale = WritableUtils.readVInt(in); + final int byteArrayLen = WritableUtils.readVInt(in); + if (internalStorage.length != byteArrayLen) { + internalStorage = new byte[byteArrayLen]; + } + in.readFully(internalStorage); + } + + @Override + public void write(final DataOutput out) throws IOException { + WritableUtils.writeVInt(out, scale); + WritableUtils.writeVInt(out, internalStorage.length); + out.write(internalStorage); + } + + @Override + public int compareTo(final BigDecimalWritable that) { + return getBigDecimal().compareTo(that.getBigDecimal()); + } + + public void writeToByteStream(final Output byteStream) { + LazyBinaryUtils.writeVInt(byteStream, scale); + LazyBinaryUtils.writeVInt(byteStream, internalStorage.length); + byteStream.write(internalStorage, 0, internalStorage.length); + } + + @Override + public String toString() { + return getBigDecimal().toString(); + } + + @Override + public boolean equals(final Object other) { + if (other == null || !(other instanceof BigDecimalWritable)) { + return false; + } + final BigDecimalWritable bdw = (BigDecimalWritable) other; + + // 'equals' and 'compareTo' are not compatible with BigDecimals. We want + // compareTo which returns true iff the numbers are equal (e.g.: 3.14 is + // the same as 3.140). 'Equals' returns true iff equal and the same + // scale + // is set in the decimals (e.g.: 3.14 is not the same as 3.140) + return getBigDecimal().compareTo(bdw.getBigDecimal()) == 0; + } + + @Override + public int hashCode() { + return getBigDecimal().hashCode(); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BinaryWritable.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BinaryWritable.java new file mode 100644 index 0000000..11ab576 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BinaryWritable.java @@ -0,0 +1,93 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Writable; + +import parquet.io.api.Binary; + +/** + * + * A Wrapper to support constructor with Binary and String + * + * TODO : remove it, and call BytesWritable with the getBytes() in HIVE-6366 + * + */ +public class BinaryWritable implements Writable { + + private Binary binary; + + public BinaryWritable(final Binary binary) { + this.binary = binary; + } + + public Binary getBinary() { + return binary; + } + + public byte[] getBytes() { + return binary.getBytes(); + } + + public String getString() { + return binary.toStringUsingUTF8(); + } + + @Override + public void readFields(DataInput input) throws IOException { + byte[] bytes = new byte[input.readInt()]; + input.readFully(bytes); + binary = Binary.fromByteArray(bytes); + } + + @Override + public void write(DataOutput output) throws IOException { + output.writeInt(binary.length()); + binary.writeTo(output); + } + + @Override + public int hashCode() { + return binary == null ? 0 : binary.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof BinaryWritable) { + final BinaryWritable other = (BinaryWritable)obj; + return binary.equals(other.binary); + } + return false; + } + + public static class DicBinaryWritable extends BinaryWritable { + + private final String string; + + public DicBinaryWritable(Binary binary, String string) { + super(binary); + this.string = string; + } + + @Override + public String getString() { + return string; + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java new file mode 100644 index 0000000..060b1b7 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java @@ -0,0 +1,61 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.write; + +import java.util.HashMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.ArrayWritable; + +import parquet.hadoop.api.WriteSupport; +import parquet.io.api.RecordConsumer; +import parquet.schema.MessageType; +import parquet.schema.MessageTypeParser; + +/** + * + * DataWritableWriteSupport is a WriteSupport for the DataWritableWriter + * + */ +public class DataWritableWriteSupport extends WriteSupport { + + public static final String PARQUET_HIVE_SCHEMA = "parquet.hive.schema"; + + private DataWritableWriter writer; + private MessageType schema; + + public static void setSchema(final MessageType schema, final Configuration configuration) { + configuration.set(PARQUET_HIVE_SCHEMA, schema.toString()); + } + + public static MessageType getSchema(final Configuration configuration) { + return MessageTypeParser.parseMessageType(configuration.get(PARQUET_HIVE_SCHEMA)); + } + + @Override + public WriteContext init(final Configuration configuration) { + schema = getSchema(configuration); + return new WriteContext(schema, new HashMap()); + } + + @Override + public void prepareForWrite(final RecordConsumer recordConsumer) { + writer = new DataWritableWriter(recordConsumer, schema); + } + + @Override + public void write(final ArrayWritable record) { + writer.write(record); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java new file mode 100644 index 0000000..a98f6be --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -0,0 +1,154 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.write; + +import org.apache.hadoop.hive.ql.io.parquet.writable.BigDecimalWritable; +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +import parquet.io.ParquetEncodingException; +import parquet.io.api.RecordConsumer; +import parquet.schema.GroupType; +import parquet.schema.Type; + +/** + * + * DataWritableWriter is a writer, + * that will read an ArrayWritable and give the data to parquet + * with the expected schema + * + */ +public class DataWritableWriter { + + private final RecordConsumer recordConsumer; + private final GroupType schema; + + public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema) { + this.recordConsumer = recordConsumer; + this.schema = schema; + } + + public void write(final ArrayWritable arr) { + if (arr == null) { + return; + } + recordConsumer.startMessage(); + writeData(arr, schema); + recordConsumer.endMessage(); + } + + private void writeData(final ArrayWritable arr, final GroupType type) { + if (arr == null) { + return; + } + final int fieldCount = type.getFieldCount(); + Writable[] values = arr.get(); + for (int field = 0; field < fieldCount; ++field) { + final Type fieldType = type.getType(field); + final String fieldName = fieldType.getName(); + final Writable value = values[field]; + if (value == null) { + continue; + } + recordConsumer.startField(fieldName, field); + + if (fieldType.isPrimitive()) { + writePrimitive(value); + } else { + recordConsumer.startGroup(); + if (value instanceof ArrayWritable) { + if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) { + writeArray((ArrayWritable) value, fieldType.asGroupType()); + } else { + writeData((ArrayWritable) value, fieldType.asGroupType()); + } + } else if (value != null) { + throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value); + } + + recordConsumer.endGroup(); + } + + recordConsumer.endField(fieldName, field); + } + } + + private void writeArray(final ArrayWritable array, final GroupType type) { + if (array == null) { + return; + } + final Writable[] subValues = array.get(); + final int fieldCount = type.getFieldCount(); + for (int field = 0; field < fieldCount; ++field) { + final Type subType = type.getType(field); + recordConsumer.startField(subType.getName(), field); + for (int i = 0; i < subValues.length; ++i) { + final Writable subValue = subValues[i]; + if (subValue != null) { + if (subType.isPrimitive()) { + if (subValue instanceof ArrayWritable) { + writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ? + } else { + writePrimitive(subValue); + } + } else { + if (!(subValue instanceof ArrayWritable)) { + throw new RuntimeException("This should be a ArrayWritable: " + subValue); + } else { + recordConsumer.startGroup(); + writeData((ArrayWritable) subValue, subType.asGroupType()); + recordConsumer.endGroup(); + } + } + } + } + recordConsumer.endField(subType.getName(), field); + } + } + + private void writePrimitive(final Writable value) { + if (value == null) { + return; + } + if (value instanceof DoubleWritable) { + recordConsumer.addDouble(((DoubleWritable) value).get()); + } else if (value instanceof BooleanWritable) { + recordConsumer.addBoolean(((BooleanWritable) value).get()); + } else if (value instanceof FloatWritable) { + recordConsumer.addFloat(((FloatWritable) value).get()); + } else if (value instanceof IntWritable) { + recordConsumer.addInteger(((IntWritable) value).get()); + } else if (value instanceof LongWritable) { + recordConsumer.addLong(((LongWritable) value).get()); + } else if (value instanceof ShortWritable) { + recordConsumer.addInteger(((ShortWritable) value).get()); + } else if (value instanceof ByteWritable) { + recordConsumer.addInteger(((ByteWritable) value).get()); + } else if (value instanceof BigDecimalWritable) { + throw new UnsupportedOperationException("BigDecimal writing not implemented"); + } else if (value instanceof BinaryWritable) { + recordConsumer.addBinary(((BinaryWritable) value).getBinary()); + } else { + throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass()); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java new file mode 100644 index 0000000..cd603c2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java @@ -0,0 +1,93 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.write; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.hive.ql.io.FSRecordWriter; + +import parquet.hadoop.ParquetOutputFormat; +import parquet.hadoop.util.ContextUtil; + +public class ParquetRecordWriterWrapper implements RecordWriter, + FSRecordWriter { + + public static final Log LOG = LogFactory.getLog(ParquetRecordWriterWrapper.class); + + private final org.apache.hadoop.mapreduce.RecordWriter realWriter; + private final TaskAttemptContext taskContext; + + public ParquetRecordWriterWrapper( + final OutputFormat realOutputFormat, + final JobConf jobConf, + final String name, + final Progressable progress) throws IOException { + try { + // create a TaskInputOutputContext + TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id")); + if (taskAttemptID == null) { + taskAttemptID = new TaskAttemptID(); + } + taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID); + + LOG.info("creating real writer to write at " + name); + realWriter = (org.apache.hadoop.mapreduce.RecordWriter) + ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); + LOG.info("real writer: " + realWriter); + } catch (final InterruptedException e) { + throw new IOException(e); + } + } + + @Override + public void close(final Reporter reporter) throws IOException { + try { + realWriter.close(taskContext); + } catch (final InterruptedException e) { + throw new IOException(e); + } + } + + @Override + public void write(final Void key, final ArrayWritable value) throws IOException { + try { + realWriter.write(key, value); + } catch (final InterruptedException e) { + throw new IOException(e); + } + } + + @Override + public void close(final boolean abort) throws IOException { + close(null); + } + + @Override + public void write(final Writable w) throws IOException { + write(null, (ArrayWritable) w); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index 2bc7e86..0637d46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -131,6 +132,12 @@ public boolean equals(Object o) { && this.typeInfo.getTypeName().equals(c.getTypeInfo().getTypeName()); } + public static Collection removeVirtualColumns(final Collection columns) { + for(VirtualColumn vcol : VIRTUAL_COLUMNS) { + columns.remove(vcol.getName()); + } + return columns; + } public static StructObjectInspector getVCSObjectInspector(List vcs) { List names = new ArrayList(vcs.size()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 45bd8a6..0fdeb32 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1219,7 +1219,7 @@ public static void createMRWorkForMergingFiles (FileSinkOperator fsInput, // 2. Constructing a conditional task consisting of a move task and a map reduce task // MoveWork dummyMv = new MoveWork(null, null, null, - new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false); + new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), null, false); MapWork cplan; Serializable work; @@ -1369,6 +1369,8 @@ public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask, statsWork = new StatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { statsWork = new StatsWork(mvWork.getLoadFileWork()); + } else if (mvWork.getCompleteBulkLoadWork() != null) { + statsWork = new StatsWork(mvWork.getCompleteBulkLoadWork()); } assert statsWork != null : "Error when genereting StatsTask"; @@ -1579,6 +1581,8 @@ public static boolean isSkewedStoredAsDirs(FileSinkDesc fsInputDesc) { srcDir = mvWork.getLoadFileWork().getSourcePath(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourcePath(); + } else if (mvWork.getCompleteBulkLoadWork() != null) { + srcDir = mvWork.getCompleteBulkLoadWork().getSourcePath(); } if ((srcDir != null) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 13d0a56..db9fa74 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -59,6 +59,9 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -138,6 +141,10 @@ protected static final String ORCFILE_SERDE = OrcSerde.class .getName(); + protected static final String PARQUETFILE_INPUT = MapredParquetInputFormat.class.getName(); + protected static final String PARQUETFILE_OUTPUT = MapredParquetOutputFormat.class.getName(); + protected static final String PARQUETFILE_SERDE = ParquetHiveSerDe.class.getName(); + class RowFormatParams { String fieldDelim = null; String fieldEscape = null; @@ -225,6 +232,12 @@ protected boolean fillStorageFormat(ASTNode child, AnalyzeCreateCommonVars share shared.serde = ORCFILE_SERDE; storageFormat = true; break; + case HiveParser.TOK_TBLPARQUETFILE: + inputFormat = PARQUETFILE_INPUT; + outputFormat = PARQUETFILE_OUTPUT; + shared.serde = PARQUETFILE_SERDE; + storageFormat = true; + break; case HiveParser.TOK_TABLEFILEFORMAT: inputFormat = unescapeSQLString(child.getChild(0).getText()); outputFormat = unescapeSQLString(child.getChild(1).getText()); @@ -256,6 +269,10 @@ protected void fillDefaultStorageFormat(AnalyzeCreateCommonVars shared) { inputFormat = ORCFILE_INPUT; outputFormat = ORCFILE_OUTPUT; shared.serde = ORCFILE_SERDE; + } else if ("PARQUET".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + inputFormat = PARQUETFILE_INPUT; + outputFormat = PARQUETFILE_OUTPUT; + shared.serde = PARQUETFILE_SERDE; } else { inputFormat = TEXTFILE_INPUT; outputFormat = TEXTFILE_OUTPUT; @@ -947,7 +964,7 @@ public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) { * @return true if the specification is prefix; never returns false, but throws * @throws HiveException */ - final public boolean isValidPrefixSpec(Table tTable, Map spec) + public final boolean isValidPrefixSpec(Table tTable, Map spec) throws HiveException { // TODO - types need to be checked. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 9f15609..14f38c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -459,11 +459,33 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { case HiveParser.TOK_EXCHANGEPARTITION: analyzeExchangePartition(ast); break; + case HiveParser.TOK_SHOW_SET_ROLE: + analyzeSetShowRole(ast); + break; default: throw new SemanticException("Unsupported command."); } } + private void analyzeSetShowRole(ASTNode ast) throws SemanticException { + switch (ast.getChildCount()) { + case 0: + ctx.setResFile(ctx.getLocalTmpPath()); + rootTasks.add(hiveAuthorizationTaskFactory.createShowCurrentRoleTask( + getInputs(), getOutputs(), ctx.getResFile())); + setFetchTask(createFetchTask(RoleDDLDesc.getRoleNameSchema())); + break; + case 1: + rootTasks.add(hiveAuthorizationTaskFactory.createSetRoleTask( + BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText()), + getInputs(), getOutputs())); + break; + default: + throw new SemanticException("Internal error. ASTNode expected to have 0 or 1 child. " + + ast.dump()); + } + } + private void analyzeGrantRevokeRole(boolean grant, ASTNode ast) throws SemanticException { Task task; if(grant) { @@ -864,7 +886,7 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap() : partSpec); ltd.setLbCtx(lbCtx); - Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), + Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, null, false), conf); truncateTask.addDependentTask(moveTsk); @@ -940,7 +962,7 @@ private void analyzeCreateIndex(ASTNode ast) throws SemanticException { break; case HiveParser.TOK_CREATEINDEX_INDEXTBLNAME: ASTNode ch = (ASTNode) child.getChild(0); - indexTableName = getUnescapedName((ASTNode) ch); + indexTableName = getUnescapedName(ch); break; case HiveParser.TOK_DEFERRED_REBUILDINDEX: deferredRebuild = true; @@ -1479,7 +1501,7 @@ private void analyzeAlterTablePartMergeFiles(ASTNode tablePartAST, ASTNode ast, LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap() : partSpec); ltd.setLbCtx(lbCtx); - Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), + Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, null, false), conf); mergeTask.addDependentTask(moveTsk); @@ -2120,7 +2142,7 @@ private void analyzeShowLocks(ASTNode ast) throws SemanticException { for (int i = 0; i < ast.getChildCount(); i++) { ASTNode child = (ASTNode) ast.getChild(i); if (child.getType() == HiveParser.TOK_TABTYPE) { - ASTNode tableTypeExpr = (ASTNode) child; + ASTNode tableTypeExpr = child; tableName = QualifiedNameUtil.getFullyQualifiedName((ASTNode) tableTypeExpr.getChild(0)); // get partition metadata if partition specified @@ -2345,7 +2367,7 @@ private void analyzeAlterTableRenameCol(ASTNode ast) throws SemanticException { private void analyzeAlterTableRenamePart(ASTNode ast, String tblName, HashMap oldPartSpec) throws SemanticException { - Map newPartSpec = extractPartitionSpecs((ASTNode) ast.getChild(0)); + Map newPartSpec = extractPartitionSpecs(ast.getChild(0)); if (newPartSpec == null) { throw new SemanticException("RENAME PARTITION Missing Destination" + ast); } @@ -2514,7 +2536,7 @@ private void analyzeAlterTableAddParts(CommonTree ast, boolean expectView) } currentPart = getPartSpec(child); validatePartitionValues(currentPart); // validate reserved values - validatePartSpec(tab, currentPart, (ASTNode)child, conf, true); + validatePartSpec(tab, currentPart, child, conf, true); break; case HiveParser.TOK_PARTITIONLOCATION: // if location specified, set in partition diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index f83c15d..aea9c1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -153,6 +153,7 @@ KW_SEQUENCEFILE: 'SEQUENCEFILE'; KW_TEXTFILE: 'TEXTFILE'; KW_RCFILE: 'RCFILE'; KW_ORCFILE: 'ORC'; +KW_PARQUETFILE: 'PARQUET'; KW_INPUTFORMAT: 'INPUTFORMAT'; KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; KW_INPUTDRIVER: 'INPUTDRIVER'; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 7e69912..3f91ec1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -183,6 +183,7 @@ TOK_TABLEROWFORMATMAPKEYS; TOK_TABLEROWFORMATLINES; TOK_TABLEROWFORMATNULL; TOK_TBLORCFILE; +TOK_TBLPARQUETFILE; TOK_TBLSEQUENCEFILE; TOK_TBLTEXTFILE; TOK_TBLRCFILE; @@ -281,6 +282,7 @@ TOK_GRANT_ROLE; TOK_REVOKE_ROLE; TOK_SHOW_ROLE_GRANT; TOK_SHOW_ROLES; +TOK_SHOW_SET_ROLE; TOK_SHOWINDEXES; TOK_SHOWDBLOCKS; TOK_INDEXCOMMENT; @@ -671,6 +673,8 @@ ddlStatement | showRoles | grantRole | revokeRole + | setRole + | showCurrentRole ; ifExists @@ -1223,6 +1227,7 @@ fileFormat | KW_TEXTFILE -> ^(TOK_TBLTEXTFILE) | KW_RCFILE -> ^(TOK_TBLRCFILE) | KW_ORCFILE -> ^(TOK_TBLORCFILE) + | KW_PARQUETFILE -> ^(TOK_TBLPARQUETFILE) | KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) | genericSpec=identifier -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) @@ -1376,6 +1381,20 @@ showRoles -> ^(TOK_SHOW_ROLES) ; +showCurrentRole +@init {pushMsg("show current role", state);} +@after {popMsg(state);} + : KW_SHOW KW_CURRENT KW_ROLES + -> ^(TOK_SHOW_SET_ROLE) + ; + +setRole +@init {pushMsg("set role", state);} +@after {popMsg(state);} + : KW_SET KW_ROLE roleName=identifier + -> ^(TOK_SHOW_SET_ROLE $roleName) + ; + showGrants @init {pushMsg("show grants", state);} @after {popMsg(state);} @@ -1704,6 +1723,7 @@ tableFileFormat | KW_STORED KW_AS KW_TEXTFILE -> TOK_TBLTEXTFILE | KW_STORED KW_AS KW_RCFILE -> TOK_TBLRCFILE | KW_STORED KW_AS KW_ORCFILE -> TOK_TBLORCFILE + | KW_STORED KW_AS KW_PARQUETFILE -> TOK_TBLPARQUETFILE | KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) | KW_STORED KW_BY storageHandler=StringLiteral diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 538b2b0..8eaffba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -535,5 +535,5 @@ identifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_PARQUETFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN ; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index ceb4c8a..044abcf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -290,7 +290,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { Utilities.getTableDesc(table), new TreeMap(), false); Task loadTableTask = TaskFactory.get(new MoveWork(getInputs(), - getOutputs(), loadTableWork, null, false), conf); + getOutputs(), loadTableWork, null, null, false), conf); copyTask.addDependentTask(loadTableTask); rootTasks.add(copyTask); return loadTableTask; @@ -338,7 +338,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { partSpec.getPartSpec(), true); loadTableWork.setInheritTableSpecs(false); Task loadPartTask = TaskFactory.get(new MoveWork( - getInputs(), getOutputs(), loadTableWork, null, false), + getInputs(), getOutputs(), loadTableWork, null, null, false), conf); copyTask.addDependentTask(loadPartTask); addPartTask.addDependentTask(loadPartTask); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index a22a15f..29e33f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -261,7 +261,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); Task childTask = TaskFactory.get(new MoveWork(getInputs(), - getOutputs(), loadTableWork, null, true), conf); + getOutputs(), loadTableWork, null, null, true), conf); if (rTask != null) { rTask.addDependentTask(childTask); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java index 76f5a31..e009f5d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java @@ -124,8 +124,8 @@ public ParseContext getParseContext(ParseContext pCtx, List nameToSplitSample; private List loadTableWork; private List loadFileWork; + private List completeBulkLoadWork; private Context ctx; private HiveConf conf; private HashMap idToTableNameMap; @@ -170,7 +172,7 @@ public ParseContext( HashMap topToTable, HashMap> topToProps, Map fsopToTable, - List loadTableWork, List loadFileWork, + List loadTableWork, List loadFileWork, List completeBulkLoadWork, Context ctx, HashMap idToTableNameMap, int destTableId, UnionProcContext uCtx, List> listMapJoinOpsNoReducer, Map> groupOpToInputTables, @@ -195,6 +197,7 @@ public ParseContext( this.topToProps = topToProps; this.loadFileWork = loadFileWork; this.loadTableWork = loadTableWork; + this.completeBulkLoadWork = completeBulkLoadWork; this.opParseCtx = opParseCtx; this.topOps = topOps; this.topSelOps = topSelOps; @@ -451,6 +454,14 @@ public void setLoadFileWork(List loadFileWork) { this.loadFileWork = loadFileWork; } + public List getCompleteBulkLoadWork() { + return completeBulkLoadWork; + } + + public void setCompleteBulkLoadWork(List completeBulkLoadWork) { + this.completeBulkLoadWork = completeBulkLoadWork; + } + public HashMap getIdToTableNameMap() { return idToTableNameMap; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 80409ac..136fda4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -135,6 +135,7 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.HBaseCompleteBulkLoadDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; @@ -204,6 +205,7 @@ private LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; + private List completeBulkLoadWork; private Map joinContext; private Map smbMapJoinContext; private final HashMap topToTable; @@ -276,6 +278,7 @@ public SemanticAnalyzer(HiveConf conf) throws SemanticException { topSelOps = new HashMap>(); loadTableWork = new ArrayList(); loadFileWork = new ArrayList(); + completeBulkLoadWork = new ArrayList(); opParseCtx = new LinkedHashMap, OpParseContext>(); joinContext = new HashMap(); smbMapJoinContext = new HashMap(); @@ -303,6 +306,7 @@ protected void reset() { super.reset(); loadTableWork.clear(); loadFileWork.clear(); + completeBulkLoadWork.clear(); topOps.clear(); topSelOps.clear(); destTableId = 1; @@ -327,6 +331,7 @@ public void initParseCtx(ParseContext pctx) { opParseCtx = pctx.getOpParseCtx(); loadTableWork = pctx.getLoadTableWork(); loadFileWork = pctx.getLoadFileWork(); + completeBulkLoadWork = pctx.getCompleteBulkLoadWork(); joinContext = pctx.getJoinContext(); smbMapJoinContext = pctx.getSmbMapJoinContext(); ctx = pctx.getContext(); @@ -344,8 +349,8 @@ public void initParseCtx(ParseContext pctx) { public ParseContext getParseContext() { return new ParseContext(conf, qb, ast, opToPartPruner, opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, topToTableProps, - fsopToTable, loadTableWork, - loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, + fsopToTable, loadTableWork, loadFileWork, completeBulkLoadWork, + ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, @@ -5399,6 +5404,22 @@ private boolean checkHoldDDLTime(QB qb) { return false; } + /** + * Return true when {@code table} is registered with the HBaseStorageHandler, false otherwise. + */ + private boolean isHBaseTable(Table table) { + return table.getStorageHandler().getClass().getSimpleName().equals("HBaseStorageHandler"); + } + + /** + * Return true when HBase bulkload is enabled, false otherwise. + * + * Logic duplicated from {@code HBaseStorageHandler#isHBaseBulkLoad}. + */ + private boolean isHBaseBulkload(HiveConf conf) { + return conf.getBoolean("hive.hbase.bulkload", false); + } + @SuppressWarnings("nls") private Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException { @@ -5417,6 +5438,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) SortBucketRSCtx rsCtx = new SortBucketRSCtx(); DynamicPartitionCtx dpCtx = null; LoadTableDesc ltd = null; + HBaseCompleteBulkLoadDesc cbld = null; boolean holdDDLTime = checkHoldDDLTime(qb); ListBucketingCtx lbCtx = null; @@ -5474,15 +5496,15 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) } boolean isNonNativeTable = dest_tab.isNonNative(); - if (isNonNativeTable) { - queryTmpdir = dest_path; - } else { - // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. + if (!isNonNativeTable || isHBaseTable(dest_tab) && isHBaseBulkload(conf)) { + // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir - // on same namespace as tbl dir. - queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ? - ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) : + // on same namespace as tbl dir. + queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ? + ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) : ctx.getExternalTmpPath(dest_path.toUri()); + } else { + queryTmpdir = dest_path; } if (dpCtx != null) { // set the root of the temporay path where dynamic partition columns will populate @@ -5517,6 +5539,16 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) loadTableWork.add(ltd); } + // check hbase bulkload scenario + if (isHBaseTable(dest_tab) && isHBaseBulkload(conf)) { + /* + * In order for the MoveTask to be added to the plan, cbld.getSourcePath() must match + * finalDirName in GenMapRedUtils#findMoveTask + */ + cbld = new HBaseCompleteBulkLoadDesc(queryTmpdir, table_desc); + completeBulkLoadWork.add(cbld); + } + WriteEntity output = null; // Here only register the whole table for post-exec hook if no DP present @@ -9105,7 +9137,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, topToTableProps, fsopToTable, - loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, + loadTableWork, loadFileWork, completeBulkLoadWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, @@ -9740,7 +9772,7 @@ private ASTNode analyzeCreateTable(ASTNode ast, QB qb) break; default: - assert false; + throw new AssertionError("Unknown token: " + child.getToken()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 2495c40..b1d3371 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -96,6 +96,7 @@ commandType.put(HiveParser.TOK_GRANT_ROLE, HiveOperation.GRANT_ROLE); commandType.put(HiveParser.TOK_REVOKE_ROLE, HiveOperation.REVOKE_ROLE); commandType.put(HiveParser.TOK_SHOW_ROLES, HiveOperation.SHOW_ROLES); + commandType.put(HiveParser.TOK_SHOW_SET_ROLE, HiveOperation.SHOW_ROLES); commandType.put(HiveParser.TOK_SHOW_ROLE_GRANT, HiveOperation.SHOW_ROLE_GRANT); commandType.put(HiveParser.TOK_ALTERDATABASE_PROPERTIES, HiveOperation.ALTERDATABASE); commandType.put(HiveParser.TOK_DESCDATABASE, HiveOperation.DESCDATABASE); @@ -215,6 +216,7 @@ public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) case HiveParser.TOK_ALTERTABLE_SKEWED: case HiveParser.TOK_TRUNCATETABLE: case HiveParser.TOK_EXCHANGEPARTITION: + case HiveParser.TOK_SHOW_SET_ROLE: return new DDLSemanticAnalyzer(conf); case HiveParser.TOK_ALTERTABLE_PARTITION: HiveOperation commandType = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index b569ed0..6ba8f1a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.HBaseCompleteBulkLoadDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; @@ -87,6 +88,7 @@ public void compile(final ParseContext pCtx, final List loadTableWork = pCtx.getLoadTableWork(); List loadFileWork = pCtx.getLoadFileWork(); + List completeBulkLoadWork = pCtx.getCompleteBulkLoadWork(); boolean isCStats = qb.isAnalyzeRewrite(); @@ -135,7 +137,7 @@ public void compile(final ParseContext pCtx, final List tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf); + Task tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, null, false), conf); mvTask.add(tsk); // Check to see if we are stale'ing any indexes and auto-update them if we want if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { @@ -188,10 +190,14 @@ public void compile(final ParseContext pCtx, final List createRevokeTask(ASTNode node, HashSet inputs, HashSet outputs) throws SemanticException; + + public Task createSetRoleTask(String roleName, + HashSet inputs, HashSet outputs) throws SemanticException; + + public Task createShowCurrentRoleTask(HashSet inputs, + HashSet outputs, Path resFile) throws SemanticException; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java index e91258a..dcee8d8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.ql.plan.ShowGrantDesc; import org.apache.hadoop.hive.ql.security.authorization.Privilege; import org.apache.hadoop.hive.ql.security.authorization.PrivilegeRegistry; +import org.apache.hadoop.hive.ql.security.authorization.PrivilegeType; import org.apache.hadoop.hive.ql.session.SessionState; /** * Default implementation of HiveAuthorizationTaskFactory @@ -322,7 +323,8 @@ private PrivilegeObjectDesc analyzePrivilegeObject(ASTNode ast, Privilege privObj = PrivilegeRegistry.getPrivilege(privilegeType.getType()); if (privObj == null) { - throw new SemanticException("undefined privilege " + privilegeType.getText()); + throw new SemanticException("Undefined privilege " + PrivilegeType. + getPrivTypeByToken(privilegeType.getType())); } List cols = null; if (privilegeDef.getChildCount() > 1) { @@ -370,4 +372,21 @@ private Partition getPartition(Table table, Map partSpec) private String toMessage(ErrorMsg message, Object detail) { return detail == null ? message.getMsg() : message.getMsg(detail.toString()); } + + @Override + public Task createSetRoleTask(String roleName, + HashSet inputs, HashSet outputs) + throws SemanticException { + return TaskFactory.get(new DDLWork(inputs, outputs, new RoleDDLDesc(roleName, + RoleDDLDesc.RoleOperation.SET_ROLE)), conf); + } + + @Override + public Task createShowCurrentRoleTask( + HashSet inputs, HashSet outputs, Path resFile) + throws SemanticException { + RoleDDLDesc ddlDesc = new RoleDDLDesc(null, RoleDDLDesc.RoleOperation.SHOW_CURRENT_ROLE); + ddlDesc.setResFile(resFile.toString()); + return TaskFactory.get(new DDLWork(inputs, outputs, ddlDesc), conf); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HBaseCompleteBulkLoadDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/HBaseCompleteBulkLoadDesc.java new file mode 100644 index 0000000..f8df91f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HBaseCompleteBulkLoadDesc.java @@ -0,0 +1,17 @@ +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.fs.Path; + +public class HBaseCompleteBulkLoadDesc extends LoadDesc { + private TableDesc table; + + public HBaseCompleteBulkLoadDesc(Path sourcePath, TableDesc table) { + super(sourcePath); + this.table = table; + } + + @Explain(displayName = "table") + public TableDesc getTable() { return table; } + + public void setTable(TableDesc table) { this.table = table; } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java index 407450e..01abf7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java @@ -36,6 +36,7 @@ private LoadTableDesc loadTableWork; private LoadFileDesc loadFileWork; private LoadMultiFilesDesc loadMultiFilesWork; + private HBaseCompleteBulkLoadDesc completeBulkLoadWork; private boolean checkFileFormat; @@ -63,10 +64,11 @@ public MoveWork(HashSet inputs, HashSet outputs) { public MoveWork(HashSet inputs, HashSet outputs, final LoadTableDesc loadTableWork, final LoadFileDesc loadFileWork, - boolean checkFileFormat) { + final HBaseCompleteBulkLoadDesc completeBulkLoadWork, boolean checkFileFormat) { this(inputs, outputs); this.loadTableWork = loadTableWork; this.loadFileWork = loadFileWork; + this.completeBulkLoadWork = completeBulkLoadWork; this.checkFileFormat = checkFileFormat; } @@ -97,6 +99,15 @@ public void setLoadFileWork(final LoadFileDesc loadFileWork) { this.loadFileWork = loadFileWork; } + @Explain(displayName = "HBase completeBulkLoad") + public HBaseCompleteBulkLoadDesc getCompleteBulkLoadWork() { + return completeBulkLoadWork; + } + + public void setCompleteBulkLoadWork(HBaseCompleteBulkLoadDesc completeBulkLoadWork) { + this.completeBulkLoadWork = completeBulkLoadWork; + } + public boolean getCheckFileFormat() { return checkFileFormat; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java index 77853c5..e3d2b4a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java @@ -28,15 +28,15 @@ private static final long serialVersionUID = 1L; private String name; - + private PrincipalType principalType; - + private boolean group; private RoleOperation operation; - + private String resFile; - + private String roleOwnerName; /** @@ -60,7 +60,8 @@ public static String getRoleDescSchema() { } public static enum RoleOperation { - DROP_ROLE("drop_role"), CREATE_ROLE("create_role"), SHOW_ROLE_GRANT("show_role_grant"), SHOW_ROLES("show_roles"); + DROP_ROLE("drop_role"), CREATE_ROLE("create_role"), SHOW_ROLE_GRANT("show_role_grant"), + SHOW_ROLES("show_roles"), SET_ROLE("set_role"), SHOW_CURRENT_ROLE("show_current_role"); private String operationName; private RoleOperation() { @@ -74,11 +75,12 @@ public String getOperationName() { return operationName; } + @Override public String toString () { return this.operationName; } } - + public RoleDDLDesc(){ } @@ -102,7 +104,7 @@ public String getName() { public void setName(String roleName) { this.name = roleName; } - + @Explain(displayName = "role operation") public RoleOperation getOperation() { return operation; @@ -111,7 +113,7 @@ public RoleOperation getOperation() { public void setOperation(RoleOperation operation) { this.operation = operation; } - + public PrincipalType getPrincipalType() { return principalType; } @@ -127,7 +129,7 @@ public boolean getGroup() { public void setGroup(boolean group) { this.group = group; } - + public String getResFile() { return resFile; } @@ -135,7 +137,7 @@ public String getResFile() { public void setResFile(String resFile) { this.resFile = resFile; } - + public String getRoleOwnerName() { return roleOwnerName; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java index 66d4d4a..d572d74 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java @@ -34,6 +34,7 @@ private tableSpec tableSpecs; // source table spec -- for TableScanOperator private LoadTableDesc loadTableDesc; // same as MoveWork.loadTableDesc -- for FileSinkOperator private LoadFileDesc loadFileDesc; // same as MoveWork.loadFileDesc -- for FileSinkOperator + private HBaseCompleteBulkLoadDesc completeBulkLoadDesc; private String aggKey; // aggregation key prefix private boolean statsReliable; // are stats completely reliable @@ -70,6 +71,10 @@ public StatsWork(LoadFileDesc loadFileDesc) { this.loadFileDesc = loadFileDesc; } + public StatsWork(HBaseCompleteBulkLoadDesc completeBulkLoadDesc) { + this.completeBulkLoadDesc = completeBulkLoadDesc; + } + public StatsWork(boolean statsReliable) { this.statsReliable = statsReliable; } @@ -86,6 +91,10 @@ public LoadFileDesc getLoadFileDesc() { return loadFileDesc; } + public HBaseCompleteBulkLoadDesc getCompleteBulkLoadDesc() { + return completeBulkLoadDesc; + } + public void setAggKey(String aggK) { aggKey = aggK; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessorFactory.java index 0ad2fde..70c76b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessorFactory.java @@ -45,13 +45,13 @@ private CommandProcessorFactory() { public static CommandProcessor get(String cmd) throws SQLException { - return get(cmd, null); + return get(new String[]{cmd}, null); } - public static CommandProcessor getForHiveCommand(String cmd, HiveConf conf) + public static CommandProcessor getForHiveCommand(String[] cmd, HiveConf conf) throws SQLException { HiveCommand hiveCommand = HiveCommand.find(cmd); - if (hiveCommand == null || isBlank(cmd)) { + if (hiveCommand == null || isBlank(cmd[0])) { return null; } if (conf == null) { @@ -61,8 +61,8 @@ public static CommandProcessor getForHiveCommand(String cmd, HiveConf conf) for (String availableCommand : conf.getVar(HiveConf.ConfVars.HIVE_SECURITY_COMMAND_WHITELIST).split(",")) { availableCommands.add(availableCommand.toLowerCase().trim()); } - if (!availableCommands.contains(cmd.trim().toLowerCase())) { - throw new SQLException("Insufficient privileges to execute " + cmd, "42000"); + if (!availableCommands.contains(cmd[0].trim().toLowerCase())) { + throw new SQLException("Insufficient privileges to execute " + cmd[0], "42000"); } switch (hiveCommand) { case SET: @@ -83,13 +83,13 @@ public static CommandProcessor getForHiveCommand(String cmd, HiveConf conf) } } - public static CommandProcessor get(String cmd, HiveConf conf) + public static CommandProcessor get(String[] cmd, HiveConf conf) throws SQLException { CommandProcessor result = getForHiveCommand(cmd, conf); if (result != null) { return result; } - if (isBlank(cmd)) { + if (isBlank(cmd[0])) { return null; } else { if (conf == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java index 280d94e..ae532f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java @@ -38,11 +38,18 @@ COMMANDS.add(command.name()); } } - public static HiveCommand find(String command) { - if (command != null) { - command = command.trim().toUpperCase(); - if (COMMANDS.contains(command)) { - return HiveCommand.valueOf(command); + public static HiveCommand find(String[] command) { + if (null == command){ + return null; + } + String cmd = command[0]; + if (cmd != null) { + cmd = cmd.trim().toUpperCase(); + if (command.length > 1 && "role".equalsIgnoreCase(command[1])) { + // special handling for set role r1 statement + return null; + } else if (COMMANDS.contains(cmd)) { + return HiveCommand.valueOf(cmd); } } return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java index b5306a1..d68d19d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java @@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.security.UserGroupInformation; @@ -30,7 +31,7 @@ protected String userName; protected List groupNames; - + protected Configuration conf; @Override @@ -74,4 +75,9 @@ public Configuration getConf() { return this.conf; } + @Override + public void setSessionState(SessionState ss) { + //no op + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/HiveAuthenticationProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/security/HiveAuthenticationProvider.java index 1a5a87a..c77eecd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/HiveAuthenticationProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/HiveAuthenticationProvider.java @@ -22,17 +22,20 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; /** * HiveAuthenticationProvider is an interface for authentication. The * implementation should return userNames and groupNames. */ public interface HiveAuthenticationProvider extends Configurable{ - + public String getUserName(); - + public List getGroupNames(); - + public void destroy() throws HiveException; + public void setSessionState(SessionState ss); + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/SessionStateConfigUserAuthenticator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/SessionStateConfigUserAuthenticator.java new file mode 100644 index 0000000..812105c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/SessionStateConfigUserAuthenticator.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; + +/** + * Authenticator to be used for testing and debugging. This picks the user.name + * set in SessionState config, if that is null, it returns value of + * System property user.name + */ +public class SessionStateConfigUserAuthenticator implements HiveAuthenticationProvider { + + private final List groupNames = new ArrayList(); + + protected Configuration conf; + private SessionState sessionState; + + @Override + public List getGroupNames() { + return groupNames; + } + + @Override + public String getUserName() { + String newUserName = sessionState.getConf().get("user.name"); + return newUserName != null ? newUserName : System.getProperty("user.name"); + } + + @Override + public void destroy() throws HiveException { + return; + } + + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration arg0) { + } + + @Override + public void setSessionState(SessionState sessionState) { + this.sessionState = sessionState; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/SessionStateUserAuthenticator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/SessionStateUserAuthenticator.java new file mode 100644 index 0000000..dad00b8 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/SessionStateUserAuthenticator.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; + +/** + * Authenticator that returns the userName set in SessionState. For use when authorizing with HS2 + * so that HS2 can set the user for the session through SessionState + */ +public class SessionStateUserAuthenticator implements HiveAuthenticationProvider { + + private final List groupNames = new ArrayList(); + + protected Configuration conf; + private SessionState sessionState; + + public SessionStateUserAuthenticator(SessionState sessionState){ + this.sessionState = sessionState; + } + + @Override + public List getGroupNames() { + return groupNames; + } + + @Override + public String getUserName() { + return sessionState.getUserName(); + } + + @Override + public void destroy() throws HiveException { + return; + } + + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration arg0) { + } + + @Override + public void setSessionState(SessionState sessionState) { + this.sessionState = sessionState; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeRegistry.java index 12aa4ff..6ae2d99 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeRegistry.java @@ -30,31 +30,16 @@ public class PrivilegeRegistry { protected static Map Registry = null; + protected static Map RegistryV2 = null; public static Privilege getPrivilege(PrivilegeType privilegeType) { - initializeRegistry(); return Registry.get(privilegeType); } - private static void initializeRegistry() { - if(Registry != null){ - //already initialized, nothing to do - return; - } - //population of registry done in separate synchronized call - populateRegistry(); - } - /** - * Add entries to registry. This needs to be synchronized to avoid Registry being populated - * multiple times. + * Add entries to registry. */ - private static synchronized void populateRegistry() { - //do check again in synchronized block - if(Registry != null){ - //already initialized, nothing to do - return; - } + static { Registry = new HashMap(); //add the privileges supported in authorization mode V1 @@ -68,23 +53,28 @@ private static synchronized void populateRegistry() { Registry.put(Privilege.SELECT.getPriv(), Privilege.SELECT); Registry.put(Privilege.SHOW_DATABASE.getPriv(), Privilege.SHOW_DATABASE); - if(SessionState.get().isAuthorizationModeV2()){ - //add the privileges not supported in V1 - //The list of privileges supported in V2 is implementation defined, - //so just pass everything that syntax supports. - Registry.put(Privilege.INSERT.getPriv(), Privilege.INSERT); - Registry.put(Privilege.DELETE.getPriv(), Privilege.DELETE); - } + + //add the privileges not supported in V1 + //The list of privileges supported in V2 is implementation defined, + //so just pass everything that syntax supports. + RegistryV2 = new HashMap(); + RegistryV2.putAll(Registry); + RegistryV2.put(Privilege.INSERT.getPriv(), Privilege.INSERT); + RegistryV2.put(Privilege.DELETE.getPriv(), Privilege.DELETE); } public static Privilege getPrivilege(int privilegeToken) { - initializeRegistry(); - return Registry.get(PrivilegeType.getPrivTypeByToken(privilegeToken)); + PrivilegeType ptype = PrivilegeType.getPrivTypeByToken(privilegeToken); + return getPrivilegeFromRegistry(ptype); } public static Privilege getPrivilege(String privilegeName) { - initializeRegistry(); - return Registry.get(PrivilegeType.getPrivTypeByName(privilegeName)); + PrivilegeType ptype = PrivilegeType.getPrivTypeByName(privilegeName); + return getPrivilegeFromRegistry(ptype); + } + + private static Privilege getPrivilegeFromRegistry(PrivilegeType ptype) { + return SessionState.get().isAuthorizationModeV2() ? RegistryV2.get(ptype) : Registry.get(ptype); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeType.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeType.java index 484861b..5c2f389 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeType.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/PrivilegeType.java @@ -67,9 +67,7 @@ public Integer getToken() { * @return corresponding PrivilegeType */ public static PrivilegeType getPrivTypeByToken(int token) { - if(token2Type == null){ - populateToken2Type(); - } + populateToken2Type(); PrivilegeType privType = token2Type.get(token); if(privType != null){ return privType; @@ -93,9 +91,7 @@ private static synchronized void populateToken2Type() { * @return corresponding PrivilegeType */ public static PrivilegeType getPrivTypeByName(String privilegeName) { - if(name2Type == null){ - populateName2Type(); - } + populateName2Type(); String canonicalizedName = privilegeName.toLowerCase(); PrivilegeType privType = name2Type.get(canonicalizedName); if(privType != null){ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java new file mode 100644 index 0000000..f27fc65 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security.authorization.plugin; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Exception thrown by the Authorization plugin api (v2). Indicates + * an error while performing authorization, and not a authorization being + * denied. + */ +@LimitedPrivate(value = { "" }) +@Evolving +public class HiveAccessControlException extends HiveException{ + + private static final long serialVersionUID = 1L; + + public HiveAccessControlException(){ + } + + public HiveAccessControlException(String msg){ + super(msg); + } + + public HiveAccessControlException(String msg, Throwable cause){ + super(msg, cause); + } + + public HiveAccessControlException(Throwable cause){ + super(cause); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java index 008efb1..03ac770 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java @@ -19,42 +19,50 @@ import java.util.List; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; + /** * Interface that is invoked by access control commands, including grant/revoke role/privileges, * create/drop roles, and commands to read the state of authorization rules. * Methods here have corresponding methods in HiveAuthorizer, check method documentation there. */ +@LimitedPrivate(value = { "" }) +@Evolving public interface HiveAccessController { void grantPrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, HivePrincipal grantorPrincipal, boolean grantOption) - throws HiveAuthorizationPluginException;; + throws HiveAuthzPluginException, HiveAccessControlException; void revokePrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, HivePrincipal grantorPrincipal, boolean grantOption) - throws HiveAuthorizationPluginException;; + throws HiveAuthzPluginException, HiveAccessControlException; void createRole(String roleName, HivePrincipal adminGrantor) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; void dropRole(String roleName) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; List getRoles(HivePrincipal hivePrincipal) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; void grantRole(List hivePrincipals, List roles, boolean grantOption, HivePrincipal grantorPrinc) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; void revokeRole(List hivePrincipals, List roles, boolean grantOption, HivePrincipal grantorPrinc) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; List getAllRoles() - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; List showPrivileges(HivePrincipal principal, HivePrivilegeObject privObj) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; + + void setCurrentRole(String roleName) throws HiveAuthzPluginException; + List getCurrentRoles() throws HiveAuthzPluginException; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationPluginException.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationPluginException.java deleted file mode 100644 index 3ab8a9a..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationPluginException.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.security.authorization.plugin; - -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * Exception thrown by the Authorization plugin api (v2) - */ -@Public -public class HiveAuthorizationPluginException extends HiveException{ - - private static final long serialVersionUID = 1L; - - public HiveAuthorizationPluginException(){ - } - - public HiveAuthorizationPluginException(String msg){ - super(msg); - } - - public HiveAuthorizationPluginException(String msg, Throwable cause){ - super(msg, cause); - } - - public HiveAuthorizationPluginException(Throwable cause){ - super(cause); - } - -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java index 59367dd..7ffbc44 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java @@ -19,24 +19,28 @@ import java.util.List; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; /** * Interface used to check if user has privileges to perform certain action. * Methods here have corresponding methods in HiveAuthorizer, check method documentation there. */ -@Public +@LimitedPrivate(value = { "" }) @Evolving public interface HiveAuthorizationValidator { + /** - * Check if current user has privileges to perform given operation type hiveOpType on the given - * input and output objects + * Check if current user has privileges to perform given operation type + * hiveOpType on the given input and output objects + * * @param hiveOpType * @param inputHObjs * @param outputHObjs + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void checkPrivileges(HiveOperationType hiveOpType, List inputHObjs, - List outputHObjs) throws HiveAuthorizationPluginException; + List outputHObjs) throws HiveAuthzPluginException, HiveAccessControlException; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java index 632901e..c50a78b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java @@ -19,7 +19,7 @@ import java.util.List; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; @@ -33,7 +33,7 @@ * statements and does not make assumptions about the privileges needed for a hive operation. * This is referred to as V2 authorizer in other parts of the code. */ -@Public +@LimitedPrivate(value = { "" }) @Evolving public interface HiveAuthorizer { @@ -51,11 +51,12 @@ * @param hivePrivObject * @param grantorPrincipal * @param grantOption - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void grantPrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, HivePrincipal grantorPrincipal, boolean grantOption) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * Revoke privileges for principals on the object @@ -64,38 +65,42 @@ void grantPrivileges(List hivePrincipals, List hiv * @param hivePrivObject * @param grantorPrincipal * @param grantOption - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void revokePrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, HivePrincipal grantorPrincipal, boolean grantOption) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * Create role * @param roleName * @param adminGrantor - The user in "[ WITH ADMIN ]" clause of "create role" - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void createRole(String roleName, HivePrincipal adminGrantor) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * Drop role * @param roleName - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void dropRole(String roleName) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * Get roles that this user/role belongs to * @param hivePrincipal - user or role * @return list of roles - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ List getRoles(HivePrincipal hivePrincipal) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * Grant roles in given roles list to principals in given hivePrincipals list @@ -103,11 +108,12 @@ void dropRole(String roleName) * @param roles * @param grantOption * @param grantorPrinc - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void grantRole(List hivePrincipals, List roles, boolean grantOption, HivePrincipal grantorPrinc) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** @@ -116,41 +122,47 @@ void grantRole(List hivePrincipals, List roles, boolean g * @param roles * @param grantOption * @param grantorPrinc - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void revokeRole(List hivePrincipals, List roles, boolean grantOption, HivePrincipal grantorPrinc) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * Check if user has privileges to do this action on these objects * @param hiveOpType * @param inputsHObjs * @param outputHObjs - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ void checkPrivileges(HiveOperationType hiveOpType, List inputsHObjs, List outputHObjs) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * @return all existing roles - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ List getAllRoles() - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; /** * Show privileges for given principal on given object * @param principal * @param privObj * @return - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException */ List showPrivileges(HivePrincipal principal, HivePrivilegeObject privObj) - throws HiveAuthorizationPluginException; + throws HiveAuthzPluginException, HiveAccessControlException; + void setCurrentRole(String roleName) throws HiveAuthzPluginException; + List getCurrentRoles() throws HiveAuthzPluginException; //other functions to be added - //showUsersInRole(rolename) //isSuperuser(username) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java index c004105..876527a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java @@ -17,16 +17,17 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; /** * Implementation of this interface specified through hive configuration will be used to * create {@link HiveAuthorizer} instance used for hive authorization. * */ -@Public +@LimitedPrivate(value = { "" }) @Evolving public interface HiveAuthorizerFactory { /** @@ -35,9 +36,10 @@ * for the current thread. Each invocation of method in HiveAuthorizer can happen in * different thread, so get the current instance in each method invocation. * @param conf - current HiveConf - * @param hiveCurrentUser - user for current session + * @param hiveAuthenticator - authenticator, provides user name * @return new instance of HiveAuthorizer + * @throws HiveAuthzPluginException */ HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreClientFactory, - HiveConf conf, String hiveCurrentUser); + HiveConf conf, HiveAuthenticationProvider hiveAuthenticator) throws HiveAuthzPluginException; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java index 172746e..67b2ba1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java @@ -19,7 +19,7 @@ import java.util.List; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; /** @@ -28,7 +28,7 @@ * {@link HiveAccessController} and {@link HiveAuthorizationValidator} to constructor. * */ -@Public +@LimitedPrivate(value = { "" }) @Evolving public class HiveAuthorizerImpl implements HiveAuthorizer { HiveAccessController accessController; @@ -42,7 +42,7 @@ public HiveAuthorizerImpl(HiveAccessController accessController, HiveAuthorizati @Override public void grantPrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, - HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthorizationPluginException { + HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthzPluginException, HiveAccessControlException { accessController.grantPrivileges(hivePrincipals, hivePrivileges, hivePrivObject, grantorPrincipal, grantOption); } @@ -50,52 +50,52 @@ public void grantPrivileges(List hivePrincipals, @Override public void revokePrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, - HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthorizationPluginException { + HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthzPluginException, HiveAccessControlException { accessController.revokePrivileges(hivePrincipals, hivePrivileges, hivePrivObject, grantorPrincipal, grantOption); } @Override - public void createRole(String roleName, HivePrincipal adminGrantor) throws HiveAuthorizationPluginException { + public void createRole(String roleName, HivePrincipal adminGrantor) throws HiveAuthzPluginException, HiveAccessControlException { accessController.createRole(roleName, adminGrantor); } @Override - public void dropRole(String roleName) throws HiveAuthorizationPluginException { + public void dropRole(String roleName) throws HiveAuthzPluginException, HiveAccessControlException { accessController.dropRole(roleName); } @Override - public List getRoles(HivePrincipal hivePrincipal) throws HiveAuthorizationPluginException { + public List getRoles(HivePrincipal hivePrincipal) throws HiveAuthzPluginException, HiveAccessControlException { return accessController.getRoles(hivePrincipal); } @Override public void grantRole(List hivePrincipals, List roles, - boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthorizationPluginException { + boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthzPluginException, HiveAccessControlException { accessController.grantRole(hivePrincipals, roles, grantOption, grantorPrinc); } @Override public void revokeRole(List hivePrincipals, List roles, - boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthorizationPluginException { + boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthzPluginException, HiveAccessControlException { accessController.revokeRole(hivePrincipals, roles, grantOption, grantorPrinc); } @Override public void checkPrivileges(HiveOperationType hiveOpType, List inputHObjs, - List outputHObjs) throws HiveAuthorizationPluginException { + List outputHObjs) throws HiveAuthzPluginException, HiveAccessControlException { authValidator.checkPrivileges(hiveOpType, inputHObjs, outputHObjs); } @Override - public List getAllRoles() throws HiveAuthorizationPluginException { + public List getAllRoles() throws HiveAuthzPluginException, HiveAccessControlException { return accessController.getAllRoles(); } @Override public List showPrivileges(HivePrincipal principal, - HivePrivilegeObject privObj) throws HiveAuthorizationPluginException { + HivePrivilegeObject privObj) throws HiveAuthzPluginException, HiveAccessControlException { return accessController.showPrivileges(principal, privObj); } @@ -104,6 +104,16 @@ public VERSION getVersion() { return VERSION.V1; } + @Override + public void setCurrentRole(String roleName) throws HiveAuthzPluginException { + accessController.setCurrentRole(roleName); + } + + @Override + public List getCurrentRoles() throws HiveAuthzPluginException { + return accessController.getCurrentRoles(); + } + // other access control functions diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzPluginException.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzPluginException.java new file mode 100644 index 0000000..7e99930 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzPluginException.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security.authorization.plugin; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Exception thrown by the Authorization plugin api (v2). Indicates + * an error while performing authorization, and not a authorization being + * denied. + */ +@LimitedPrivate(value = { "" }) +@Evolving +public class HiveAuthzPluginException extends HiveException{ + + private static final long serialVersionUID = 1L; + + public HiveAuthzPluginException(){ + } + + public HiveAuthzPluginException(String msg){ + super(msg); + } + + public HiveAuthzPluginException(String msg, Throwable cause){ + super(msg, cause); + } + + public HiveAuthzPluginException(Throwable cause){ + super(cause); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java index 4208b2d..f994f20 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java @@ -17,14 +17,14 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin; -import java.io.IOException; - -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; import org.apache.hadoop.hive.metastore.IMetaStoreClient; /** * Factory for getting current valid instance of IMetaStoreClient */ -@Public +@LimitedPrivate(value = { "" }) +@Evolving public interface HiveMetastoreClientFactory { - IMetaStoreClient getHiveMetastoreClient() throws IOException; + IMetaStoreClient getHiveMetastoreClient() throws HiveAuthzPluginException; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java index 1fadb3e..019f600 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.security.authorization.plugin; -import java.io.IOException; - import org.apache.hadoop.hive.common.classification.InterfaceAudience.Private; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -32,13 +30,14 @@ public class HiveMetastoreClientFactoryImpl implements HiveMetastoreClientFactory{ @Override - public IMetaStoreClient getHiveMetastoreClient() throws IOException { + public IMetaStoreClient getHiveMetastoreClient() throws HiveAuthzPluginException { + String errMsg = "Error getting metastore client"; try { return Hive.get().getMSC(); } catch (MetaException e) { - throw new IOException(e); + throw new HiveAuthzPluginException(errMsg, e); } catch (HiveException e) { - throw new IOException(e); + throw new HiveAuthzPluginException(errMsg, e); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java index 0fcfe52..f1671ba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; /** * List of hive operations types. */ -@Public +@LimitedPrivate(value = { "" }) +@Evolving public enum HiveOperationType { EXPLAIN, LOAD, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java index 42e9f23..62b8994 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java @@ -17,15 +17,25 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; + /** * Represents the user or role in grant/revoke statements */ +@LimitedPrivate(value = { "" }) +@Evolving public class HivePrincipal { public enum HivePrincipalType{ USER, ROLE, UNKNOWN } + @Override + public String toString() { + return "Principal [name=" + name + ", type=" + type + "]"; + } + private final String name; private final HivePrincipalType type; @@ -40,4 +50,32 @@ public HivePrincipalType getType() { return type; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((name == null) ? 0 : name.hashCode()); + result = prime * result + ((type == null) ? 0 : type.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + HivePrincipal other = (HivePrincipal) obj; + if (name == null) { + if (other.name != null) + return false; + } else if (!name.equals(other.name)) + return false; + if (type != other.type) + return false; + return true; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java index 4b9d133..126300a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java @@ -18,16 +18,27 @@ package org.apache.hadoop.hive.ql.security.authorization.plugin; import java.util.List; +import java.util.Locale; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; /** * Represents the hive privilege being granted/revoked */ +@LimitedPrivate(value = { "" }) +@Evolving public class HivePrivilege { + @Override + public String toString() { + return "Privilege [name=" + name + ", columns=" + columns + "]"; + } + private final String name; private final List columns; public HivePrivilege(String name, List columns){ - this.name = name; + this.name = name.toUpperCase(Locale.US); this.columns = columns; } @@ -39,4 +50,37 @@ public String getName() { return columns; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((columns == null) ? 0 : columns.hashCode()); + result = prime * result + ((name == null) ? 0 : name.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + HivePrivilege other = (HivePrivilege) obj; + if (columns == null) { + if (other.columns != null) + return false; + } else if (!columns.equals(other.columns)) + return false; + if (name == null) { + if (other.name != null) + return false; + } else if (!name.equals(other.name)) + return false; + return true; + } + + + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java index 829c85d..3f9fa81 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java @@ -17,15 +17,14 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; /** * Represents a privilege granted for an object to a principal */ -@Public +@LimitedPrivate(value = { "" }) @Evolving - public class HivePrivilegeInfo{ private final HivePrincipal principal; private final HivePrivilege privilege; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java index 5b101c2..a774773 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java @@ -17,16 +17,22 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; /** * Represents the object on which privilege is being granted/revoked */ -@Public +@LimitedPrivate(value = { "" }) @Unstable public class HivePrivilegeObject { + @Override + public String toString() { + return "Hive Object [type=" + type + ", dbname=" + dbname + ", table/viewname=" + + tableviewname + "]"; + } + public enum HivePrivilegeObjectType { DATABASE, TABLE, VIEW, PARTITION, URI}; private final HivePrivilegeObjectType type; private final String dbname; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRole.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRole.java index 8ea9822..a23239b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRole.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRole.java @@ -17,9 +17,13 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; import org.apache.hadoop.hive.metastore.api.Role; // same with thrift.Role +@LimitedPrivate(value = { "" }) +@Evolving public class HiveRole { private String roleName; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/package-info.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/package-info.java new file mode 100644 index 0000000..ef8f19d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/package-info.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * This package provides interfaces and classes that can be used to implement custom authorization for hive. + * + * How hive code uses this interface: + * The interface that hive code invokes is HiveAuthorizer class. + * The classes HivePrincipal, HivePrivilege, HivePrivilegeObject, HivePrivilegeInfo, HiveOperationType + * are arguments used in the authorization interface. + * The methods in the interface throws two types of exceptions - HiveAuthzPluginException (in + * case of internal errors), and HiveAuthzPluginDeniedException (when action is not permitted + * because authorization has failed). + * + * Hive uses the HiveAuthorizerFactory interface, whose implementing class is configurable through + * hive configuration, to instantiate an instance of this interface. + * + * + * Guide on implementing the interface: + * There are two categories of operations to be done by the authorization interface, one is the + * actions performed by the access control statements, which updates the privileges that have + * been granted (and stores in some where like metastore database), and also retrieves the current + * state of privileges. You may choose not to implement this part and juse a no-op implementation + * if you are going to manage the authorization externally (eg, if you base it on mapping to + * file system permissions). + * The 2nd category of operation is authorizing the hive actions by checking against the privileges + * the user has on the objects. + * HiveAccessController has the interface for the first type of operations and + * HiveAuthorizationValidator has interface for second type of operations. + * + * HiveAuthorizerImpl is a convenience class that you can use by just passing the implementations + * of these two interfaces (HiveAuthorizerImpl, HiveAuthorizationValidator) in the constructor. + * + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/GrantPrivAuthUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/GrantPrivAuthUtils.java new file mode 100644 index 0000000..0c535be --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/GrantPrivAuthUtils.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal.HivePrincipalType; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilege; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; + +/** + * Utility class to authorize grant/revoke privileges + */ +public class GrantPrivAuthUtils { + + static void authorize(List hivePrincipals, List hivePrivileges, + HivePrivilegeObject hivePrivObject, boolean grantOption, IMetaStoreClient metastoreClient, + String userName) + throws HiveAuthzPluginException, HiveAccessControlException { + + // check if this user has grant privileges for this privileges on this + // object + + // map priv being granted to required privileges + RequiredPrivileges reqPrivs = getGrantRequiredPrivileges(hivePrivileges); + + // api for checking required privileges for a user + checkRequiredPrivileges(hivePrincipals, reqPrivs, hivePrivObject, metastoreClient, userName); + } + + private static void checkRequiredPrivileges(List hivePrincipals, + RequiredPrivileges reqPrivs, HivePrivilegeObject hivePrivObject, + IMetaStoreClient metastoreClient, String userName) + throws HiveAuthzPluginException, HiveAccessControlException { + + for (HivePrincipal hivePrincipal : hivePrincipals) { + checkRequiredPrivileges(hivePrincipal, reqPrivs, hivePrivObject, metastoreClient, userName); + } + } + + private static void checkRequiredPrivileges(HivePrincipal hivePrincipal, + RequiredPrivileges reqPrivileges, HivePrivilegeObject hivePrivObject, + IMetaStoreClient metastoreClient, String userName) + throws HiveAuthzPluginException, HiveAccessControlException { + + // keep track of the principals on which privileges have been checked for + // this object + + // get privileges for this user and its roles on this object + RequiredPrivileges availPrivs = SQLAuthorizationUtils.getPrivilegesFromMetaStore( + metastoreClient, userName, hivePrivObject); + + // check if required privileges is subset of available privileges + Collection missingPrivs = reqPrivileges.findMissingPrivs(availPrivs); + SQLAuthorizationUtils.assertNoMissingPrivilege(missingPrivs, new HivePrincipal(userName, + HivePrincipalType.USER), hivePrivObject); + } + + private static RequiredPrivileges getGrantRequiredPrivileges(List hivePrivileges) + throws HiveAuthzPluginException { + RequiredPrivileges reqPrivs = new RequiredPrivileges(); + for (HivePrivilege hivePriv : hivePrivileges) { + reqPrivs.addPrivilege(hivePriv.getName(), true /* grant priv required */); + } + return reqPrivs; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java new file mode 100644 index 0000000..e448cba --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; + +/** + * Mapping of operation to its required input and output privileges + */ +public class Operation2Privilege { + + private static class InOutPrivs { + private final SQLPrivTypeGrant[] inputPrivs; + private final SQLPrivTypeGrant[] outputPrivs; + + InOutPrivs(SQLPrivTypeGrant[] inputPrivs, SQLPrivTypeGrant[] outputPrivs) { + this.inputPrivs = inputPrivs; + this.outputPrivs = outputPrivs; + } + + private SQLPrivTypeGrant[] getInputPrivs() { + return inputPrivs; + } + + private SQLPrivTypeGrant[] getOutputPrivs() { + return outputPrivs; + } + } + + private static Map op2Priv; + + private static SQLPrivTypeGrant[] OWNER_PRIV_AR = arr(SQLPrivTypeGrant.OWNER_PRIV); + private static SQLPrivTypeGrant[] SEL_NOGRANT_AR = arr(SQLPrivTypeGrant.SELECT_NOGRANT); + private static SQLPrivTypeGrant[] SEL_GRANT_AR = arr(SQLPrivTypeGrant.SELECT_WGRANT); + private static SQLPrivTypeGrant[] ADMIN_PRIV_AR = arr(SQLPrivTypeGrant.ADMIN_PRIV); + + static { + op2Priv = new HashMap(); + + op2Priv.put(HiveOperationType.EXPLAIN, new InOutPrivs(SEL_NOGRANT_AR, + SEL_NOGRANT_AR)); //?? + op2Priv.put(HiveOperationType.LOAD, new InOutPrivs(ADMIN_PRIV_AR, null)); + // select with grant for exporting contents + op2Priv.put(HiveOperationType.EXPORT, new InOutPrivs(SEL_GRANT_AR, null)); + + op2Priv.put(HiveOperationType.IMPORT, new InOutPrivs(ADMIN_PRIV_AR, null)); + + op2Priv.put(HiveOperationType.CREATEDATABASE, new InOutPrivs(ADMIN_PRIV_AR, null)); + op2Priv.put(HiveOperationType.DROPDATABASE, new InOutPrivs(ADMIN_PRIV_AR, null)); + //this should be database usage privilege once it is supported + op2Priv.put(HiveOperationType.SWITCHDATABASE, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.LOCKDB, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.UNLOCKDB, new InOutPrivs(null, null)); + + op2Priv.put(HiveOperationType.DROPTABLE, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.DESCTABLE, new InOutPrivs(SEL_NOGRANT_AR, null)); + op2Priv.put(HiveOperationType.DESCFUNCTION, new InOutPrivs(null, null)); + + //meta store check command - require admin priv + op2Priv.put(HiveOperationType.MSCK, new InOutPrivs(ADMIN_PRIV_AR, null)); + + //alter table commands require table ownership + op2Priv.put(HiveOperationType.ALTERTABLE_ADDCOLS, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_REPLACECOLS, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_RENAMECOL, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_RENAMEPART, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_RENAME, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_DROPPARTS, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_ADDPARTS, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_TOUCH, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_ARCHIVE, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_UNARCHIVE, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_PROPERTIES, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_SERIALIZER, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_PARTCOLTYPE, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERPARTITION_SERIALIZER, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_SERDEPROPERTIES, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERPARTITION_SERDEPROPERTIES, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_CLUSTER_SORT, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_BUCKETNUM, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERPARTITION_BUCKETNUM, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_PROTECTMODE, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERPARTITION_PROTECTMODE, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_FILEFORMAT, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERPARTITION_FILEFORMAT, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_LOCATION, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERPARTITION_LOCATION, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_MERGEFILES, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.ALTERPARTITION_MERGEFILES, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.ALTERTABLE_SKEWED, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.ALTERTBLPART_SKEWED_LOCATION, new InOutPrivs(null, null)); + + op2Priv.put(HiveOperationType.ANALYZE_TABLE, new InOutPrivs(arr(SQLPrivTypeGrant.SELECT_NOGRANT, SQLPrivTypeGrant.INSERT_NOGRANT), null)); + op2Priv.put(HiveOperationType.SHOWDATABASES, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.SHOWTABLES, new InOutPrivs(null, null)); + + op2Priv.put(HiveOperationType.SHOWCOLUMNS, new InOutPrivs(SEL_NOGRANT_AR, null)); + op2Priv.put(HiveOperationType.SHOW_TABLESTATUS, new InOutPrivs(SEL_NOGRANT_AR, null)); + op2Priv.put(HiveOperationType.SHOW_TBLPROPERTIES, new InOutPrivs(SEL_NOGRANT_AR, null)); + + //show create table is more sensitive information, includes table properties etc + // for now require select WITH GRANT + op2Priv.put(HiveOperationType.SHOW_CREATETABLE, new InOutPrivs(SEL_GRANT_AR, null)); + + op2Priv.put(HiveOperationType.SHOWFUNCTIONS, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.SHOWINDEXES, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.SHOWPARTITIONS, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.SHOWLOCKS, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.CREATEFUNCTION, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.DROPFUNCTION, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.CREATEMACRO, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.DROPMACRO, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.CREATEVIEW, new InOutPrivs(SEL_GRANT_AR, null)); + + // require view ownership + op2Priv.put(HiveOperationType.DROPVIEW, new InOutPrivs(OWNER_PRIV_AR, null)); + + //table ownership for create/drop/alter index + op2Priv.put(HiveOperationType.CREATEINDEX, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.DROPINDEX, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERINDEX_REBUILD, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERINDEX_PROPS, new InOutPrivs(OWNER_PRIV_AR, null)); + + // require view ownership for alter/drop view + op2Priv.put(HiveOperationType.ALTERVIEW_PROPERTIES, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.DROPVIEW_PROPERTIES, new InOutPrivs(OWNER_PRIV_AR, null)); + op2Priv.put(HiveOperationType.ALTERVIEW_RENAME, new InOutPrivs(OWNER_PRIV_AR, null)); + + op2Priv.put(HiveOperationType.LOCKTABLE, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.UNLOCKTABLE, new InOutPrivs(null, null)); + + // require db ownership + op2Priv.put(HiveOperationType.CREATETABLE, new InOutPrivs(OWNER_PRIV_AR, null)); + + // require table ownership + op2Priv.put(HiveOperationType.TRUNCATETABLE, new InOutPrivs(OWNER_PRIV_AR, null)); + + op2Priv.put(HiveOperationType.CREATETABLE_AS_SELECT, new InOutPrivs(OWNER_PRIV_AR, SEL_NOGRANT_AR)); + op2Priv.put(HiveOperationType.QUERY, new InOutPrivs(SEL_NOGRANT_AR, null)); + + op2Priv.put(HiveOperationType.ALTERDATABASE, new InOutPrivs(ADMIN_PRIV_AR, null)); + op2Priv.put(HiveOperationType.DESCDATABASE, new InOutPrivs(null, null)); + + // The following actions are authorized through SQLStdHiveAccessController, + // and it is not using this privilege mapping, but it might make sense to move it here + op2Priv.put(HiveOperationType.CREATEROLE, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.DROPROLE, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.GRANT_PRIVILEGE, new InOutPrivs(null, + null)); + op2Priv.put(HiveOperationType.REVOKE_PRIVILEGE, new InOutPrivs(null, + null)); + op2Priv.put(HiveOperationType.SHOW_GRANT, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.GRANT_ROLE, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.REVOKE_ROLE, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.SHOW_ROLES, new InOutPrivs(null, null)); + op2Priv.put(HiveOperationType.SHOW_ROLE_GRANT, new InOutPrivs(null, + null)); + + } + + /** + * Convenience method so that creation of this array in InOutPrivs constructor + * is not too verbose + * + * @param grantList + * @return grantList + */ + private static SQLPrivTypeGrant[] arr(SQLPrivTypeGrant... grantList) { + return grantList; + } + + public static SQLPrivTypeGrant[] getInputPrivs(HiveOperationType opType) { + return op2Priv.get(opType).getInputPrivs(); + } + + public static SQLPrivTypeGrant[] getOutputPrivs(HiveOperationType opType) { + return op2Priv.get(opType).getOutputPrivs(); + } + + // for unit tests + public static Set getOperationTypes() { + return op2Priv.keySet(); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/RequiredPrivileges.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/RequiredPrivileges.java new file mode 100644 index 0000000..ee06335 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/RequiredPrivileges.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; + +/** + * Captures privilege sets, and can be used to compare required and available privileges + * to find missing privileges (if any). + */ +public class RequiredPrivileges { + + private final Set privilegeGrantSet = new HashSet(); + + public void addPrivilege(String priv, boolean withGrant) throws HiveAuthzPluginException { + SQLPrivTypeGrant privType = SQLPrivTypeGrant.getSQLPrivTypeGrant(priv, withGrant); + addPrivilege(privType); + privilegeGrantSet.add(privType); + if(withGrant){ + //as with grant also implies without grant privilege, add without privilege as well + addPrivilege(priv, false); + } + } + + public Set getRequiredPrivilegeSet() { + return privilegeGrantSet; + } + + /** + * Find the missing privileges in availPrivs + * + * @param availPrivs + * - available privileges + * @return missing privileges as RequiredPrivileges object + */ + public Collection findMissingPrivs(RequiredPrivileges availPrivs) { + MissingPrivilegeCapturer missingPrivCapturer = new MissingPrivilegeCapturer(); + for (SQLPrivTypeGrant requiredPriv : privilegeGrantSet) { + if (!availPrivs.privilegeGrantSet.contains(requiredPriv)) { + missingPrivCapturer.addMissingPrivilege(requiredPriv); + } + } + return missingPrivCapturer.getMissingPrivileges(); + } + + void addPrivilege(SQLPrivTypeGrant requiredPriv) { + privilegeGrantSet.add(requiredPriv); + } + + Set getPrivilegeWithGrants() { + return privilegeGrantSet; + } + + /** + * Capture privileges that are missing. If privilege "X with grant" and "X without grant" + * are reported missing, capture only "X with grant". This is useful for better error messages. + */ + class MissingPrivilegeCapturer { + + private final Map priv2privWithGrant = new HashMap(); + + void addMissingPrivilege(SQLPrivTypeGrant newPrivWGrant) { + SQLPrivTypeGrant matchingPrivWGrant = priv2privWithGrant.get(newPrivWGrant.getPrivType()); + if (matchingPrivWGrant != null) { + if (matchingPrivWGrant.isWithGrant() || !newPrivWGrant.isWithGrant()) { + // the existing entry already has grant, or new priv does not have + // grant + // no update needs to be done. + return; + } + } + // add the new entry + priv2privWithGrant.put(newPrivWGrant.getPrivType(), newPrivWGrant); + } + + Collection getMissingPrivileges() { + return priv2privWithGrant.values(); + } + + } + + public void addAll(SQLPrivTypeGrant[] inputPrivs) { + if (inputPrivs == null) { + return; + } + for (SQLPrivTypeGrant privType : inputPrivs) { + addPrivilege(privType); + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/RevokePrivAuthUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/RevokePrivAuthUtils.java new file mode 100644 index 0000000..7177f7b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/RevokePrivAuthUtils.java @@ -0,0 +1,84 @@ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; +import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilege; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; +import org.apache.thrift.TException; + +public class RevokePrivAuthUtils { + + public static List authorizeAndGetRevokePrivileges(List principals, + List hivePrivileges, HivePrivilegeObject hivePrivObject, boolean grantOption, + IMetaStoreClient mClient, String userName) + throws HiveAuthzPluginException, HiveAccessControlException { + + List matchingPrivs = new ArrayList(); + + StringBuilder errMsg = new StringBuilder(); + for (HivePrincipal principal : principals) { + + // get metastore/thrift privilege object for this principal and object, not looking at + // privileges obtained indirectly via roles + List msObjPrivs; + try { + msObjPrivs = mClient.list_privileges(principal.getName(), + AuthorizationUtils.getThriftPrincipalType(principal.getType()), + SQLAuthorizationUtils.getThriftHiveObjectRef(hivePrivObject)); + } catch (MetaException e) { + throw new HiveAuthzPluginException(e); + } catch (TException e) { + throw new HiveAuthzPluginException(e); + } + + // the resulting privileges need to be filtered on privilege type and + // username + + // create a Map to capture object privileges corresponding to privilege + // type + Map priv2privObj = new HashMap(); + + for (HiveObjectPrivilege msObjPriv : msObjPrivs) { + PrivilegeGrantInfo grantInfo = msObjPriv.getGrantInfo(); + // check if the grantor matches current user + if (grantInfo.getGrantor() != null && grantInfo.getGrantor().equals(userName) + && grantInfo.getGrantorType() == PrincipalType.USER) { + // add to the map + priv2privObj.put(grantInfo.getPrivilege(), msObjPriv); + } + // else skip this one + } + + // find the privileges that we are looking for + for (HivePrivilege hivePrivilege : hivePrivileges) { + HiveObjectPrivilege matchedPriv = priv2privObj.get(hivePrivilege.getName()); + if (matchedPriv != null) { + matchingPrivs.add(matchedPriv); + } else { + errMsg.append("Cannot find privilege ").append(hivePrivilege).append(" for ") + .append(principal).append(" on ").append(hivePrivObject).append(" granted by ") + .append(userName).append(System.getProperty("line.separator")); + } + } + + } + + if (errMsg.length() != 0) { + throw new HiveAccessControlException(errMsg.toString()); + } + return matchingPrivs; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java new file mode 100644 index 0000000..942b11a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java @@ -0,0 +1,285 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.HiveObjectType; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.PrivilegeBag; +import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilege; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; +import org.apache.thrift.TException; + +public class SQLAuthorizationUtils { + + private static final String[] SUPPORTED_PRIVS = { "INSERT", "UPDATE", "DELETE", "SELECT" }; + private static final Set SUPPORTED_PRIVS_SET = new HashSet( + Arrays.asList(SUPPORTED_PRIVS)); + + /** + * Create thrift privileges bag + * + * @param hivePrincipals + * @param hivePrivileges + * @param hivePrivObject + * @param grantorPrincipal + * @param grantOption + * @return + * @throws HiveAuthzPluginException + */ + static PrivilegeBag getThriftPrivilegesBag(List hivePrincipals, + List hivePrivileges, HivePrivilegeObject hivePrivObject, + HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthzPluginException { + HiveObjectRef privObj = getThriftHiveObjectRef(hivePrivObject); + PrivilegeBag privBag = new PrivilegeBag(); + for (HivePrivilege privilege : hivePrivileges) { + if (privilege.getColumns() != null && privilege.getColumns().size() > 0) { + throw new HiveAuthzPluginException("Privileges on columns not supported currently" + + " in sql standard authorization mode"); + } + if (!SUPPORTED_PRIVS_SET.contains(privilege.getName().toUpperCase(Locale.US))) { + throw new HiveAuthzPluginException("Privilege: " + privilege.getName() + + " is not supported in sql standard authorization mode"); + } + PrivilegeGrantInfo grantInfo = getThriftPrivilegeGrantInfo(privilege, grantorPrincipal, + grantOption); + for (HivePrincipal principal : hivePrincipals) { + HiveObjectPrivilege objPriv = new HiveObjectPrivilege(privObj, principal.getName(), + AuthorizationUtils.getThriftPrincipalType(principal.getType()), grantInfo); + privBag.addToPrivileges(objPriv); + } + } + return privBag; + } + + static PrivilegeGrantInfo getThriftPrivilegeGrantInfo(HivePrivilege privilege, + HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthzPluginException { + try { + return AuthorizationUtils.getThriftPrivilegeGrantInfo(privilege, grantorPrincipal, + grantOption); + } catch (HiveException e) { + throw new HiveAuthzPluginException(e); + } + } + + /** + * Create a thrift privilege object from the plugin interface privilege object + * + * @param privObj + * @return + * @throws HiveAuthzPluginException + */ + static HiveObjectRef getThriftHiveObjectRef(HivePrivilegeObject privObj) + throws HiveAuthzPluginException { + try { + return AuthorizationUtils.getThriftHiveObjectRef(privObj); + } catch (HiveException e) { + throw new HiveAuthzPluginException(e); + } + } + + static HivePrivilegeObjectType getPluginObjType(HiveObjectType objectType) + throws HiveAuthzPluginException { + switch (objectType) { + case DATABASE: + return HivePrivilegeObjectType.DATABASE; + case TABLE: + return HivePrivilegeObjectType.TABLE; + case COLUMN: + case GLOBAL: + case PARTITION: + throw new HiveAuthzPluginException("Unsupported object type " + objectType); + default: + throw new AssertionError("Unexpected object type " + objectType); + } + } + + /** + * Check if the privileges are acceptable for SQL Standard authorization implementation + * @param hivePrivileges + * @throws HiveAuthzPluginException + */ + public static void validatePrivileges(List hivePrivileges) throws HiveAuthzPluginException { + for (HivePrivilege hivePrivilege : hivePrivileges) { + if (hivePrivilege.getColumns() != null && hivePrivilege.getColumns().size() != 0) { + throw new HiveAuthzPluginException( + "Privilege with columns are not currently supported with sql standard authorization:" + + hivePrivilege); + } + //try converting to the enum to verify that this is a valid privilege type + SQLPrivilegeType.getRequirePrivilege(hivePrivilege.getName()); + + } + } + + /** + * Get the privileges this user(userName argument) has on the object + * (hivePrivObject argument) + * + * @param metastoreClient + * @param userName + * @param hivePrivObject + * @return + * @throws HiveAuthzPluginException + */ + static RequiredPrivileges getPrivilegesFromMetaStore(IMetaStoreClient metastoreClient, + String userName, HivePrivilegeObject hivePrivObject) throws HiveAuthzPluginException { + + // get privileges for this user and its role on this object + PrincipalPrivilegeSet thrifPrivs = null; + try { + thrifPrivs = metastoreClient.get_privilege_set( + AuthorizationUtils.getThriftHiveObjectRef(hivePrivObject), userName, null); + } catch (MetaException e) { + throwGetPrivErr(e, hivePrivObject, userName); + } catch (TException e) { + throwGetPrivErr(e, hivePrivObject, userName); + } catch (HiveException e) { + throwGetPrivErr(e, hivePrivObject, userName); + } + + // convert to RequiredPrivileges + RequiredPrivileges privs = getRequiredPrivsFromThrift(thrifPrivs); + + // add owner privilege if user is owner of the object + if (isOwner(metastoreClient, userName, hivePrivObject)) { + privs.addPrivilege(SQLPrivTypeGrant.OWNER_PRIV); + } + + return privs; + } + + /** + * Check if user is owner of the given object + * + * @param metastoreClient + * @param userName + * user + * @param hivePrivObject + * given object + * @return true if user is owner + * @throws HiveAuthzPluginException + */ + private static boolean isOwner(IMetaStoreClient metastoreClient, String userName, + HivePrivilegeObject hivePrivObject) throws HiveAuthzPluginException { + //for now, check only table + if(hivePrivObject.getType() == HivePrivilegeObjectType.TABLE){ + Table thriftTableObj = null; + try { + thriftTableObj = metastoreClient.getTable(hivePrivObject.getDbname(), hivePrivObject.getTableviewname()); + } catch (MetaException e) { + throwGetTableErr(e, hivePrivObject); + } catch (NoSuchObjectException e) { + throwGetTableErr(e, hivePrivObject); + } catch (TException e) { + throwGetTableErr(e, hivePrivObject); + } + return userName.equals(thriftTableObj.getOwner()); + } + return false; + } + + private static void throwGetTableErr(Exception e, HivePrivilegeObject hivePrivObject) + throws HiveAuthzPluginException { + String msg = "Error getting table object from metastore for" + hivePrivObject; + throw new HiveAuthzPluginException(msg, e); + } + + private static void throwGetPrivErr(Exception e, HivePrivilegeObject hivePrivObject, + String userName) throws HiveAuthzPluginException { + String msg = "Error getting privileges on " + hivePrivObject + " for " + userName; + throw new HiveAuthzPluginException(msg, e); + } + + private static RequiredPrivileges getRequiredPrivsFromThrift(PrincipalPrivilegeSet thrifPrivs) + throws HiveAuthzPluginException { + + RequiredPrivileges reqPrivs = new RequiredPrivileges(); + // add user privileges + Map> userPrivs = thrifPrivs.getUserPrivileges(); + if (userPrivs != null && userPrivs.size() != 1) { + throw new HiveAuthzPluginException("Invalid number of user privilege objects: " + + userPrivs.size()); + } + addRequiredPrivs(reqPrivs, userPrivs); + + // add role privileges + Map> rolePrivs = thrifPrivs.getRolePrivileges(); + addRequiredPrivs(reqPrivs, rolePrivs); + return reqPrivs; + } + + /** + * Add privileges to RequiredPrivileges object reqPrivs from thrift availPrivs + * object + * @param reqPrivs + * @param availPrivs + * @throws HiveAuthzPluginException + */ + private static void addRequiredPrivs(RequiredPrivileges reqPrivs, + Map> availPrivs) throws HiveAuthzPluginException { + if(availPrivs == null){ + return; + } + for (Map.Entry> userPriv : availPrivs.entrySet()) { + List userPrivGInfos = userPriv.getValue(); + for (PrivilegeGrantInfo userPrivGInfo : userPrivGInfos) { + reqPrivs.addPrivilege(userPrivGInfo.getPrivilege(), userPrivGInfo.isGrantOption()); + } + } + } + + public static void assertNoMissingPrivilege(Collection missingPrivs, + HivePrincipal hivePrincipal, HivePrivilegeObject hivePrivObject) + throws HiveAccessControlException { + if (missingPrivs.size() != 0) { + // there are some required privileges missing, create error message + // sort the privileges so that error message is deterministic (for tests) + List sortedmissingPrivs = new ArrayList(missingPrivs); + Collections.sort(sortedmissingPrivs); + + String errMsg = "Permission denied. " + hivePrincipal + + " does not have following privileges on " + hivePrivObject + " : " + sortedmissingPrivs; + throw new HiveAccessControlException(errMsg.toString()); + } + } + + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLPrivTypeGrant.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLPrivTypeGrant.java new file mode 100644 index 0000000..95520f0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLPrivTypeGrant.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; + + +public enum SQLPrivTypeGrant { + SELECT_NOGRANT(SQLPrivilegeType.SELECT, false), + SELECT_WGRANT(SQLPrivilegeType.SELECT, true), + INSERT_NOGRANT(SQLPrivilegeType.INSERT, false), + INSERT_WGRANT(SQLPrivilegeType.INSERT, true), + UPDATE_NOGRANT(SQLPrivilegeType.UPDATE, false), + UPDATE_WGRANT(SQLPrivilegeType.UPDATE, true), + DELETE_NOGRANT(SQLPrivilegeType.DELETE, false), + DELETE_WGRANT(SQLPrivilegeType.DELETE, true), + OWNER_PRIV("Object ownership"), + ADMIN_PRIV("Admin privilege"); // This one can be used to deny permission for performing the operation + + private final SQLPrivilegeType privType; + private final boolean withGrant; + + private final String privDesc; + SQLPrivTypeGrant(SQLPrivilegeType privType, boolean isGrant){ + this.privType = privType; + this.withGrant = isGrant; + this.privDesc = privType.toString() + (withGrant ? " with grant" : ""); + } + + /** + * Constructor for privileges that are not the standard sql types, but are used by + * authorization rules + * @param privDesc + */ + SQLPrivTypeGrant(String privDesc){ + this.privDesc = privDesc; + this.privType = null; + this.withGrant = false; + } + + /** + * Find matching enum + * @param privType + * @param isGrant + * @return + */ + public static SQLPrivTypeGrant getSQLPrivTypeGrant( + SQLPrivilegeType privType, boolean isGrant) { + String typeName = privType.name() + (isGrant ? "_WGRANT" : "_NOGRANT"); + return SQLPrivTypeGrant.valueOf(typeName); + } + + /** + * Find matching enum + * + * @param privTypeStr + * privilege type string + * @param isGrant + * @return + * @throws HiveAuthzPluginException + */ + public static SQLPrivTypeGrant getSQLPrivTypeGrant(String privTypeStr, boolean isGrant) + throws HiveAuthzPluginException { + SQLPrivilegeType ptype = SQLPrivilegeType.getRequirePrivilege(privTypeStr); + return getSQLPrivTypeGrant(ptype, isGrant); + } + + public SQLPrivilegeType getPrivType() { + return privType; + } + + public boolean isWithGrant() { + return withGrant; + } + + /** + * @return String representation for use in error messages + */ + @Override + public String toString(){ + return privDesc; + } + +}; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLPrivilegeType.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLPrivilegeType.java new file mode 100644 index 0000000..bcd66d8 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLPrivilegeType.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; + +import java.util.Locale; + +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; + +public enum SQLPrivilegeType { + //ALL privilege is expanded to these, so it is not needed here + SELECT, INSERT, UPDATE, DELETE; + + public static SQLPrivilegeType getRequirePrivilege(String priv) + throws HiveAuthzPluginException { + SQLPrivilegeType reqPriv; + if(priv == null){ + throw new HiveAuthzPluginException("Null privilege obtained"); + } + try { + reqPriv = SQLPrivilegeType.valueOf(priv.toUpperCase(Locale.US)); + } catch (IllegalArgumentException e) { + throw new HiveAuthzPluginException("Unsupported privilege type " + priv, e); + } + return reqPriv; + } + + +}; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java index 5c5d0e5..c4e8801 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java @@ -18,26 +18,28 @@ package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashSet; import java.util.List; -import java.util.Locale; import java.util.Set; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStore; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; import org.apache.hadoop.hive.metastore.api.HiveObjectRef; import org.apache.hadoop.hive.metastore.api.HiveObjectType; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.PrivilegeBag; import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; import org.apache.hadoop.hive.metastore.api.Role; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessController; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilege; @@ -46,66 +48,132 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveRole; - /** - * Implements functionality of access control statements for sql standard based authorization + * Implements functionality of access control statements for sql standard based + * authorization */ @Private public class SQLStdHiveAccessController implements HiveAccessController { - private HiveMetastoreClientFactory metastoreClientFactory; - private static final String [] SUPPORTED_PRIVS = {"INSERT", "UPDATE", "DELETE", "SELECT", "ALL"}; - private static final Set SUPPORTED_PRIVS_SET - = new HashSet(Arrays.asList(SUPPORTED_PRIVS)); - + private final HiveMetastoreClientFactory metastoreClientFactory; + private final HiveConf conf; + private final HiveAuthenticationProvider authenticator; + private String currentUserName; + private List currentRoles; + private HiveRole adminRole; - SQLStdHiveAccessController(HiveMetastoreClientFactory metastoreClientFactory, - HiveConf conf, String hiveCurrentUser){ + SQLStdHiveAccessController(HiveMetastoreClientFactory metastoreClientFactory, HiveConf conf, + HiveAuthenticationProvider authenticator) throws HiveAuthzPluginException { this.metastoreClientFactory = metastoreClientFactory; + this.conf = conf; + this.authenticator = authenticator; + initUserRoles(); + } + + /** + * (Re-)initialize currentRoleNames if necessary. + * @throws HiveAuthzPluginException + */ + private void initUserRoles() throws HiveAuthzPluginException { + //to aid in testing through .q files, authenticator is passed as argument to + // the interface. this helps in being able to switch the user within a session. + // so we need to check if the user has changed + String newUserName = authenticator.getUserName(); + if(currentUserName == newUserName){ + //no need to (re-)initialize the currentUserName, currentRoles fields + return; + } + this.currentUserName = newUserName; + this.currentRoles = getRolesFromMS(); } + private List getRolesFromMS() throws HiveAuthzPluginException { + List roles; + try { + roles = metastoreClientFactory.getHiveMetastoreClient(). + list_roles(currentUserName, PrincipalType.USER); + List currentRoles = new ArrayList(roles.size()); + for (Role role : roles) { + if (!HiveMetaStore.ADMIN.equalsIgnoreCase(role.getRoleName())) { + currentRoles.add(new HiveRole(role)); + } else { + this.adminRole = new HiveRole(role); + } + } + return currentRoles; + } catch (Exception e) { + throw new HiveAuthzPluginException("Failed to retrieve roles for "+ + currentUserName, e); + } + } @Override public void grantPrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, - HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthorizationPluginException { + HivePrincipal grantorPrincipal, boolean grantOption) + throws HiveAuthzPluginException, HiveAccessControlException { - PrivilegeBag privBag = - getThriftPrivilegesBag(hivePrincipals, hivePrivileges, hivePrivObject, grantorPrincipal, - grantOption); + // expand ALL privileges, if any + hivePrivileges = expandAllPrivileges(hivePrivileges); + + SQLAuthorizationUtils.validatePrivileges(hivePrivileges); + + IMetaStoreClient metastoreClient = metastoreClientFactory.getHiveMetastoreClient(); + // authorize the grant + GrantPrivAuthUtils.authorize(hivePrincipals, hivePrivileges, hivePrivObject, grantOption, + metastoreClient, authenticator.getUserName()); + + // grant + PrivilegeBag privBag = getThriftPrivilegesBag(hivePrincipals, hivePrivileges, hivePrivObject, + grantorPrincipal, grantOption); try { - metastoreClientFactory.getHiveMetastoreClient().grant_privileges(privBag); + metastoreClient.grant_privileges(privBag); } catch (Exception e) { - throw new HiveAuthorizationPluginException("Error granting privileges", e); + throw new HiveAuthzPluginException("Error granting privileges", e); } } + private List expandAllPrivileges(List hivePrivileges) { + Set hivePrivSet = new HashSet(); + for (HivePrivilege hivePrivilege : hivePrivileges) { + if (hivePrivilege.getName().equals("ALL")) { + // expand to all supported privileges + for (SQLPrivilegeType privType : SQLPrivilegeType.values()) { + hivePrivSet.add(new HivePrivilege(privType.name(), hivePrivilege.getColumns())); + } + } else { + hivePrivSet.add(hivePrivilege); + } + } + return new ArrayList(hivePrivSet); + } + /** * Create thrift privileges bag + * * @param hivePrincipals * @param hivePrivileges * @param hivePrivObject * @param grantorPrincipal * @param grantOption * @return - * @throws HiveAuthorizationPluginException + * @throws HiveAuthzPluginException */ private PrivilegeBag getThriftPrivilegesBag(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, - HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthorizationPluginException { - HiveObjectRef privObj = getThriftHiveObjectRef(hivePrivObject); + HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthzPluginException { + + HiveObjectRef privObj = SQLAuthorizationUtils.getThriftHiveObjectRef(hivePrivObject); PrivilegeBag privBag = new PrivilegeBag(); - for(HivePrivilege privilege : hivePrivileges){ - if(privilege.getColumns() != null && privilege.getColumns().size() > 0){ - throw new HiveAuthorizationPluginException("Privileges on columns not supported currently" + for (HivePrivilege privilege : hivePrivileges) { + if (privilege.getColumns() != null && privilege.getColumns().size() > 0) { + throw new HiveAuthzPluginException("Privileges on columns not supported currently" + " in sql standard authorization mode"); } - if(!SUPPORTED_PRIVS_SET.contains(privilege.getName().toUpperCase(Locale.US))){ - throw new HiveAuthorizationPluginException("Privilege: " + privilege.getName() + - " is not supported in sql standard authorization mode"); - } - PrivilegeGrantInfo grantInfo = getThriftPrivilegeGrantInfo(privilege, grantorPrincipal, grantOption); - for(HivePrincipal principal : hivePrincipals){ + + PrivilegeGrantInfo grantInfo = getThriftPrivilegeGrantInfo(privilege, grantorPrincipal, + grantOption); + for (HivePrincipal principal : hivePrincipals) { HiveObjectPrivilege objPriv = new HiveObjectPrivilege(privObj, principal.getName(), AuthorizationUtils.getThriftPrincipalType(principal.getType()), grantInfo); privBag.addToPrivileges(objPriv); @@ -115,102 +183,95 @@ private PrivilegeBag getThriftPrivilegesBag(List hivePrincipals, } private PrivilegeGrantInfo getThriftPrivilegeGrantInfo(HivePrivilege privilege, - HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthorizationPluginException { - try { - return AuthorizationUtils.getThriftPrivilegeGrantInfo(privilege, grantorPrincipal, grantOption); - } catch (HiveException e) { - throw new HiveAuthorizationPluginException(e); - } - } - - /** - * Create a thrift privilege object from the plugin interface privilege object - * @param privObj - * @return - * @throws HiveAuthorizationPluginException - */ - private HiveObjectRef getThriftHiveObjectRef(HivePrivilegeObject privObj) - throws HiveAuthorizationPluginException { + HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthzPluginException { try { - return AuthorizationUtils.getThriftHiveObjectRef(privObj); + return AuthorizationUtils.getThriftPrivilegeGrantInfo(privilege, grantorPrincipal, + grantOption); } catch (HiveException e) { - throw new HiveAuthorizationPluginException(e); + throw new HiveAuthzPluginException(e); } } @Override public void revokePrivileges(List hivePrincipals, List hivePrivileges, HivePrivilegeObject hivePrivObject, - HivePrincipal grantorPrincipal, boolean grantOption) throws HiveAuthorizationPluginException { + HivePrincipal grantorPrincipal, boolean grantOption) + throws HiveAuthzPluginException, HiveAccessControlException { + SQLAuthorizationUtils.validatePrivileges(hivePrivileges); + + IMetaStoreClient metastoreClient = metastoreClientFactory.getHiveMetastoreClient(); + // authorize the revoke, and get the set of privileges to be revoked + List revokePrivs = RevokePrivAuthUtils + .authorizeAndGetRevokePrivileges(hivePrincipals, hivePrivileges, hivePrivObject, + grantOption, metastoreClient, authenticator.getUserName()); - PrivilegeBag privBag = - getThriftPrivilegesBag(hivePrincipals, hivePrivileges, hivePrivObject, grantorPrincipal, - grantOption); try { - metastoreClientFactory.getHiveMetastoreClient().revoke_privileges(privBag); + // unfortunately, the metastore api revokes all privileges that match on + // principal, privilege object type it does not filter on the grator + // username. + // So this will revoke privileges that are granted by other users.This is + // not SQL compliant behavior. Need to change/add a metastore api + // that has desired behavior. + metastoreClient.revoke_privileges(new PrivilegeBag(revokePrivs)); } catch (Exception e) { - throw new HiveAuthorizationPluginException("Error revoking privileges", e); + throw new HiveAuthzPluginException("Error revoking privileges", e); } } @Override public void createRole(String roleName, HivePrincipal adminGrantor) - throws HiveAuthorizationPluginException { + throws HiveAuthzPluginException { try { String grantorName = adminGrantor == null ? null : adminGrantor.getName(); - metastoreClientFactory.getHiveMetastoreClient() - .create_role(new Role(roleName, 0, grantorName)); + metastoreClientFactory.getHiveMetastoreClient().create_role( + new Role(roleName, 0, grantorName)); } catch (Exception e) { - throw new HiveAuthorizationPluginException("Error create role", e); + throw new HiveAuthzPluginException("Error create role", e); } } @Override - public void dropRole(String roleName) throws HiveAuthorizationPluginException { + public void dropRole(String roleName) throws HiveAuthzPluginException { try { metastoreClientFactory.getHiveMetastoreClient().drop_role(roleName); } catch (Exception e) { - throw new HiveAuthorizationPluginException("Error dropping role", e); + throw new HiveAuthzPluginException("Error dropping role", e); } } @Override - public List getRoles(HivePrincipal hivePrincipal) throws HiveAuthorizationPluginException { + public List getRoles(HivePrincipal hivePrincipal) throws HiveAuthzPluginException { try { List roles = metastoreClientFactory.getHiveMetastoreClient().list_roles( hivePrincipal.getName(), AuthorizationUtils.getThriftPrincipalType(hivePrincipal.getType())); - List roleNames = new ArrayList(roles.size()); + List hiveRoles = new ArrayList(roles.size()); for (Role role : roles){ - ; - roleNames.add(new HiveRole(role)); + hiveRoles.add(new HiveRole(role)); } - return roleNames; + return hiveRoles; } catch (Exception e) { - throw new HiveAuthorizationPluginException( - "Error listing roles for user" + hivePrincipal.getName(), e); + throw new HiveAuthzPluginException("Error listing roles for user " + + hivePrincipal.getName(), e); } } @Override public void grantRole(List hivePrincipals, List roleNames, - boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthorizationPluginException { - for(HivePrincipal hivePrincipal : hivePrincipals){ - for(String roleName : roleNames){ + boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthzPluginException { + for (HivePrincipal hivePrincipal : hivePrincipals) { + for (String roleName : roleNames) { try { IMetaStoreClient mClient = metastoreClientFactory.getHiveMetastoreClient(); - mClient.grant_role(roleName, - hivePrincipal.getName(), + mClient.grant_role(roleName, hivePrincipal.getName(), AuthorizationUtils.getThriftPrincipalType(hivePrincipal.getType()), grantorPrinc.getName(), - AuthorizationUtils.getThriftPrincipalType(grantorPrinc.getType()), - grantOption - ); + AuthorizationUtils.getThriftPrincipalType(grantorPrinc.getType()), grantOption); } catch (MetaException e) { - throw new HiveAuthorizationPluginException(e.getMessage(), e); + throw new HiveAuthzPluginException(e.getMessage(), e); } catch (Exception e) { - String msg = "Error granting roles for " + hivePrincipal.getName() + " to role " + String msg = "Error granting roles for " + hivePrincipal.getName() + " to role " + roleName + ": " + e.getMessage(); - throw new HiveAuthorizationPluginException(msg, e); + throw new HiveAuthzPluginException(msg, e); } } } @@ -218,77 +279,67 @@ public void grantRole(List hivePrincipals, List roleNames @Override public void revokeRole(List hivePrincipals, List roleNames, - boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthorizationPluginException { - if(grantOption){ - //removing grant privileges only is not supported in metastore api - throw new HiveAuthorizationPluginException("Revoking only the admin privileges on " + boolean grantOption, HivePrincipal grantorPrinc) throws HiveAuthzPluginException { + if (grantOption) { + // removing grant privileges only is not supported in metastore api + throw new HiveAuthzPluginException("Revoking only the admin privileges on " + "role is not currently supported"); } - for(HivePrincipal hivePrincipal : hivePrincipals){ - for(String roleName : roleNames){ + for (HivePrincipal hivePrincipal : hivePrincipals) { + for (String roleName : roleNames) { try { IMetaStoreClient mClient = metastoreClientFactory.getHiveMetastoreClient(); - mClient.revoke_role(roleName, - hivePrincipal.getName(), - AuthorizationUtils.getThriftPrincipalType(hivePrincipal.getType()) - ); - } catch (Exception e) { - String msg = "Error revoking roles for " + hivePrincipal.getName() + " to role " + roleName - + hivePrincipal.getName(); - throw new HiveAuthorizationPluginException(msg, e); + mClient.revoke_role(roleName, hivePrincipal.getName(), + AuthorizationUtils.getThriftPrincipalType(hivePrincipal.getType())); + } catch (Exception e) { + String msg = "Error revoking roles for " + hivePrincipal.getName() + " to role " + + roleName; + throw new HiveAuthzPluginException(msg, e); } } } } @Override - public List getAllRoles() throws HiveAuthorizationPluginException { + public List getAllRoles() throws HiveAuthzPluginException { try { return metastoreClientFactory.getHiveMetastoreClient().listRoleNames(); } catch (Exception e) { - throw new HiveAuthorizationPluginException("Error listing all roles", e); + throw new HiveAuthzPluginException("Error listing all roles", e); } } - @Override public List showPrivileges(HivePrincipal principal, HivePrivilegeObject privObj) - throws HiveAuthorizationPluginException { + throws HiveAuthzPluginException { try { - - List resPrivInfos = new ArrayList(); IMetaStoreClient mClient = metastoreClientFactory.getHiveMetastoreClient(); + List resPrivInfos = new ArrayList(); + // get metastore/thrift privilege object using metastore api + List msObjPrivs = mClient.list_privileges(principal.getName(), + AuthorizationUtils.getThriftPrincipalType(principal.getType()), + SQLAuthorizationUtils.getThriftHiveObjectRef(privObj)); - //get metastore/thrift privilege object using metastore api - List msObjPrivs - = mClient.list_privileges(principal.getName(), - AuthorizationUtils.getThriftPrincipalType(principal.getType()), - getThriftHiveObjectRef(privObj)); - //convert the metastore thrift objects to result objects - for(HiveObjectPrivilege msObjPriv : msObjPrivs){ - //result principal - HivePrincipal resPrincipal = - new HivePrincipal(msObjPriv.getPrincipalName(), - AuthorizationUtils.getHivePrincipalType(msObjPriv.getPrincipalType())); + // convert the metastore thrift objects to result objects + for (HiveObjectPrivilege msObjPriv : msObjPrivs) { + // result principal + HivePrincipal resPrincipal = new HivePrincipal(msObjPriv.getPrincipalName(), + AuthorizationUtils.getHivePrincipalType(msObjPriv.getPrincipalType())); - //result privilege + // result privilege PrivilegeGrantInfo msGrantInfo = msObjPriv.getGrantInfo(); HivePrivilege resPrivilege = new HivePrivilege(msGrantInfo.getPrivilege(), null); - //result object + // result object HiveObjectRef msObjRef = msObjPriv.getHiveObject(); HivePrivilegeObject resPrivObj = new HivePrivilegeObject( - getPluginObjType(msObjRef.getObjectType()), - msObjRef.getDbName(), - msObjRef.getObjectName() - ); - - //result grantor principal - HivePrincipal grantorPrincipal = - new HivePrincipal(msGrantInfo.getGrantor(), - AuthorizationUtils.getHivePrincipalType(msGrantInfo.getGrantorType())); + getPluginObjType(msObjRef.getObjectType()), msObjRef.getDbName(), + msObjRef.getObjectName()); + // result grantor principal + HivePrincipal grantorPrincipal = new HivePrincipal(msGrantInfo.getGrantor(), + AuthorizationUtils.getHivePrincipalType(msGrantInfo.getGrantorType())); HivePrivilegeInfo resPrivInfo = new HivePrivilegeInfo(resPrincipal, resPrivilege, resPrivObj, grantorPrincipal, msGrantInfo.isGrantOption()); @@ -296,17 +347,15 @@ public void revokeRole(List hivePrincipals, List roleName } return resPrivInfos; - } - catch (Exception e) { - throw new HiveAuthorizationPluginException("Error showing privileges", e); + } catch (Exception e) { + throw new HiveAuthzPluginException("Error showing privileges", e); } } - private HivePrivilegeObjectType getPluginObjType(HiveObjectType objectType) - throws HiveAuthorizationPluginException { - switch(objectType){ + throws HiveAuthzPluginException { + switch (objectType) { case DATABASE: return HivePrivilegeObjectType.DATABASE; case TABLE: @@ -314,10 +363,43 @@ private HivePrivilegeObjectType getPluginObjType(HiveObjectType objectType) case COLUMN: case GLOBAL: case PARTITION: - throw new HiveAuthorizationPluginException("Unsupported object type " + objectType); + throw new HiveAuthzPluginException("Unsupported object type " + objectType); default: throw new AssertionError("Unexpected object type " + objectType); } } + @Override + public void setCurrentRole(String roleName) throws HiveAuthzPluginException { + + if ("NONE".equalsIgnoreCase(roleName)) { + // for set role NONE, reset roles to default roles. + currentRoles.clear(); + currentRoles.addAll(getRolesFromMS()); + return; + } + for (HiveRole role : getRolesFromMS()) { + // set to one of the roles user belongs to. + if (role.getRoleName().equalsIgnoreCase(roleName)) { + currentRoles.clear(); + currentRoles.add(role); + return; + } + } + // set to ADMIN role, if user belongs there. + if (HiveMetaStore.ADMIN.equalsIgnoreCase(roleName) && null != this.adminRole) { + currentRoles.clear(); + currentRoles.add(adminRole); + return; + } + // If we are here it means, user is requesting a role he doesn't belong to. + throw new HiveAuthzPluginException(currentUserName +" doesn't belong to role " + +roleName); + } + + @Override + public List getCurrentRoles() throws HiveAuthzPluginException { + initUserRoles(); + return currentRoles; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java index 3b2361c..ac50c00 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java @@ -17,18 +17,66 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; +import java.util.Collection; import java.util.List; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationPluginException; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationValidator; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal.HivePrincipalType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; public class SQLStdHiveAuthorizationValidator implements HiveAuthorizationValidator { + private final HiveMetastoreClientFactory metastoreClientFactory; + private final HiveConf conf; + private final HiveAuthenticationProvider authenticator; + + public SQLStdHiveAuthorizationValidator(HiveMetastoreClientFactory metastoreClientFactory, + HiveConf conf, HiveAuthenticationProvider authenticator) { + this.metastoreClientFactory = metastoreClientFactory; + this.conf = conf; + this.authenticator = authenticator; + } + @Override public void checkPrivileges(HiveOperationType hiveOpType, List inputHObjs, - List outputHObjs) throws HiveAuthorizationPluginException { + List outputHObjs) throws HiveAuthzPluginException, HiveAccessControlException { + String userName = authenticator.getUserName(); + IMetaStoreClient metastoreClient = metastoreClientFactory.getHiveMetastoreClient(); + + // get privileges required on input and check + SQLPrivTypeGrant[] inputPrivs = Operation2Privilege.getInputPrivs(hiveOpType); + checkPrivileges(inputPrivs, inputHObjs, metastoreClient, userName); + + // get privileges required on input and check + SQLPrivTypeGrant[] outputPrivs = Operation2Privilege.getOutputPrivs(hiveOpType); + checkPrivileges(outputPrivs, outputHObjs, metastoreClient, userName); + + } + + private void checkPrivileges(SQLPrivTypeGrant[] reqPrivs, + List hObjs, IMetaStoreClient metastoreClient, String userName) + throws HiveAuthzPluginException, HiveAccessControlException { + RequiredPrivileges requiredInpPrivs = new RequiredPrivileges(); + requiredInpPrivs.addAll(reqPrivs); + + // check if this user has these privileges on the objects + for (HivePrivilegeObject hObj : hObjs) { + // get the privileges that this user has on the object + RequiredPrivileges availPrivs = SQLAuthorizationUtils.getPrivilegesFromMetaStore( + metastoreClient, userName, hObj); + Collection missingPriv = requiredInpPrivs + .findMissingPrivs(availPrivs); + SQLAuthorizationUtils.assertNoMissingPrivilege(missingPriv, new HivePrincipal(userName, + HivePrincipalType.USER), hObj); + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java index 7688bbf..5fc5c0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java @@ -19,19 +19,22 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience.Private; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerImpl; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; @Private public class SQLStdHiveAuthorizerFactory implements HiveAuthorizerFactory{ @Override public HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreClientFactory, - HiveConf conf, String hiveCurrentUser) { + HiveConf conf, HiveAuthenticationProvider authenticator) throws HiveAuthzPluginException { + return new HiveAuthorizerImpl( - new SQLStdHiveAccessController(metastoreClientFactory, conf, hiveCurrentUser), - new SQLStdHiveAuthorizationValidator() + new SQLStdHiveAccessController(metastoreClientFactory, conf, authenticator), + new SQLStdHiveAuthorizationValidator(metastoreClientFactory, conf, authenticator) ); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 64a8a60..845ff77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; +import org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory; @@ -340,29 +341,39 @@ public static SessionState start(SessionState startSs) { */ private void setupAuth() { - if(authenticator != null){ - //auth has been initialized + if (authenticator != null) { + // auth has been initialized return; } try { - authenticator = HiveUtils.getAuthenticator( - getConf(),HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER); - authorizer = HiveUtils.getAuthorizeProviderManager( - getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - authenticator, true); - - if(authorizer == null){ - //if it was null, the new authorization plugin must be specified in config - HiveAuthorizerFactory authorizerFactory = - HiveUtils.getAuthorizerFactory(getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER); - String authUser = userName == null ? authenticator.getUserName() : userName; - authorizerV2 = authorizerFactory.createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(), - getConf(), authUser); + authenticator = HiveUtils.getAuthenticator(getConf(), + HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER); + + if (userName != null) { + // if username is set through the session, use an authenticator that + // just returns the sessionstate user + authenticator = new SessionStateUserAuthenticator(this); } - else{ - createTableGrants = CreateTableAutomaticGrant.create(getConf()); + authenticator.setSessionState(this); + + authorizer = HiveUtils.getAuthorizeProviderManager(getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, authenticator, true); + + if (authorizer == null) { + // if it was null, the new authorization plugin must be specified in + // config + HiveAuthorizerFactory authorizerFactory = HiveUtils.getAuthorizerFactory(getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER); + + authorizerV2 = authorizerFactory.createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(), + getConf(), authenticator); + // grant all privileges for table to its owner + getConf().setVar(ConfVars.HIVE_AUTHORIZATION_TABLE_OWNER_GRANTS, "insert,select,update,delete"); } + + createTableGrants = CreateTableAutomaticGrant.create(getConf()); + } catch (HiveException e) { throw new RuntimeException(e); } @@ -594,16 +605,19 @@ public static boolean unregisterJar(String jarsToUnregister) { */ public static enum ResourceType { FILE(new ResourceHook() { + @Override public String preHook(Set cur, String s) { return validateFile(cur, s); } + @Override public boolean postHook(Set cur, String s) { return true; } }), JAR(new ResourceHook() { + @Override public String preHook(Set cur, String s) { String newJar = validateFile(cur, s); if (newJar != null) { @@ -613,16 +627,19 @@ public String preHook(Set cur, String s) { } } + @Override public boolean postHook(Set cur, String s) { return unregisterJar(s); } }), ARCHIVE(new ResourceHook() { + @Override public String preHook(Set cur, String s) { return validateFile(cur, s); } + @Override public boolean postHook(Set cur, String s) { return true; } @@ -821,6 +838,7 @@ public void setAuthenticator(HiveAuthenticationProvider authenticator) { } public CreateTableAutomaticGrant getCreateTableGrants() { + setupAuth(); return createTableGrants; } @@ -947,4 +965,9 @@ public TezSessionState getTezSession() { public void setTezSession(TezSessionState session) { this.tezSessionState = session; } + + public String getUserName() { + return userName; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDegrees.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDegrees.java index c8f3319..bc5e1e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDegrees.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDegrees.java @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncDegreesDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncDegreesLongToDouble; @@ -34,7 +33,7 @@ " -1\n" ) @VectorizedExpressions({FuncDegreesLongToDouble.class, FuncDegreesDoubleToDouble.class}) -public class UDFDegrees extends UDF{ +public class UDFDegrees extends UDFMath { @SuppressWarnings("unused") private static Log LOG = LogFactory.getLog(UDFDegrees.class.getName()); diff --git a/ql/src/java/parquet/hive/DeprecatedParquetInputFormat.java b/ql/src/java/parquet/hive/DeprecatedParquetInputFormat.java new file mode 100644 index 0000000..ec0ebc0 --- /dev/null +++ b/ql/src/java/parquet/hive/DeprecatedParquetInputFormat.java @@ -0,0 +1,37 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package parquet.hive; + +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; +import org.apache.hadoop.io.ArrayWritable; + +import parquet.hadoop.ParquetInputFormat; + +/** + * Deprecated name of the parquet-hive input format. This class exists + * simply to provide backwards compatibility with users who specified + * this name in the Hive metastore. All users should now use + * STORED AS PARQUET + */ +@Deprecated +public class DeprecatedParquetInputFormat extends MapredParquetInputFormat { + + public DeprecatedParquetInputFormat() { + super(); + } + + public DeprecatedParquetInputFormat(final ParquetInputFormat realInputFormat) { + super(realInputFormat); + } +} diff --git a/ql/src/java/parquet/hive/DeprecatedParquetOutputFormat.java b/ql/src/java/parquet/hive/DeprecatedParquetOutputFormat.java new file mode 100644 index 0000000..a0bdd75 --- /dev/null +++ b/ql/src/java/parquet/hive/DeprecatedParquetOutputFormat.java @@ -0,0 +1,36 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package parquet.hive; + +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.mapreduce.OutputFormat; + +/** + * Deprecated name of the parquet-hive output format. This class exists + * simply to provide backwards compatibility with users who specified + * this name in the Hive metastore. All users should now use + * STORED AS PARQUET + */ +@Deprecated +public class DeprecatedParquetOutputFormat extends MapredParquetOutputFormat { + + public DeprecatedParquetOutputFormat() { + super(); + } + + public DeprecatedParquetOutputFormat(final OutputFormat mapreduceOutputFormat) { + super(mapreduceOutputFormat); + } +} diff --git a/ql/src/java/parquet/hive/MapredParquetInputFormat.java b/ql/src/java/parquet/hive/MapredParquetInputFormat.java new file mode 100644 index 0000000..9b3d453 --- /dev/null +++ b/ql/src/java/parquet/hive/MapredParquetInputFormat.java @@ -0,0 +1,36 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package parquet.hive; + +import org.apache.hadoop.io.ArrayWritable; + +import parquet.hadoop.ParquetInputFormat; + +/** + * Deprecated name of the parquet-hive input format. This class exists + * simply to provide backwards compatibility with users who specified + * this name in the Hive metastore. All users should now use + * STORED AS PARQUET + */ +@Deprecated +public class MapredParquetInputFormat extends org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat { + + public MapredParquetInputFormat() { + super(); + } + + public MapredParquetInputFormat(final ParquetInputFormat realInputFormat) { + super(realInputFormat); + } +} diff --git a/ql/src/java/parquet/hive/MapredParquetOutputFormat.java b/ql/src/java/parquet/hive/MapredParquetOutputFormat.java new file mode 100644 index 0000000..dc6ea3e --- /dev/null +++ b/ql/src/java/parquet/hive/MapredParquetOutputFormat.java @@ -0,0 +1,35 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package parquet.hive; + +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.mapreduce.OutputFormat; + +/** + * Deprecated name of the parquet-hive output format. This class exists + * simply to provide backwards compatibility with users who specified + * this name in the Hive metastore. All users should now use + * STORED AS PARQUET + */ +@Deprecated +public class MapredParquetOutputFormat extends org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat { + + public MapredParquetOutputFormat () { + super(); + } + + public MapredParquetOutputFormat(final OutputFormat mapreduceOutputFormat) { + super(mapreduceOutputFormat); + } +} diff --git a/ql/src/java/parquet/hive/serde/ParquetHiveSerDe.java b/ql/src/java/parquet/hive/serde/ParquetHiveSerDe.java new file mode 100644 index 0000000..a1dcaa4 --- /dev/null +++ b/ql/src/java/parquet/hive/serde/ParquetHiveSerDe.java @@ -0,0 +1,25 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package parquet.hive.serde; + +/** + * Deprecated name of the parquet-hive output format. This class exists + * simply to provide backwards compatibility with users who specified + * this name in the Hive metastore. All users should now use + * STORED AS PARQUET + */ +@Deprecated +public class ParquetHiveSerDe extends org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe { + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java new file mode 100644 index 0000000..0b25f6e --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java @@ -0,0 +1,114 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.junit.Test; + +import parquet.schema.MessageType; +import parquet.schema.MessageTypeParser; + +public class TestHiveSchemaConverter { + + private List createHiveColumnsFrom(final String columnNamesStr) { + List columnNames; + if (columnNamesStr.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNamesStr.split(",")); + } + + return columnNames; + } + + private List createHiveTypeInfoFrom(final String columnsTypeStr) { + List columnTypes; + + if (columnsTypeStr.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr); + } + + return columnTypes; + } + + private void testConversion(final String columnNamesStr, final String columnsTypeStr, final String expectedSchema) throws Exception { + final List columnNames = createHiveColumnsFrom(columnNamesStr); + final List columnTypes = createHiveTypeInfoFrom(columnsTypeStr); + final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes); + final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema); + assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound); + } + + @Test + public void testSimpleType() throws Exception { + testConversion( + "a,b,c", + "int,double,boolean", + "message hive_schema {\n" + + " optional int32 a;\n" + + " optional double b;\n" + + " optional boolean c;\n" + + "}\n"); + } + + @Test + public void testArray() throws Exception { + testConversion("arrayCol", + "array", + "message hive_schema {\n" + + " optional group arrayCol (LIST) {\n" + + " repeated group bag {\n" + + " optional int32 array_element;\n" + + " }\n" + + " }\n" + + "}\n"); + } + + @Test + public void testStruct() throws Exception { + testConversion("structCol", + "struct", + "message hive_schema {\n" + + " optional group structCol {\n" + + " optional int32 a;\n" + + " optional double b;\n" + + " optional boolean c;\n" + + " }\n" + + "}\n"); + } + + @Test + public void testMap() throws Exception { + testConversion("mapCol", + "map", + "message hive_schema {\n" + + " optional group mapCol (MAP) {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required binary key;\n" + + " optional binary value;\n" + + " }\n" + + " }\n" + + "}\n"); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapredParquetInputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapredParquetInputFormat.java new file mode 100644 index 0000000..1a54bf5 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapredParquetInputFormat.java @@ -0,0 +1,37 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import static org.mockito.Mockito.mock; + +import org.apache.hadoop.io.ArrayWritable; +import org.junit.Test; + +import parquet.hadoop.ParquetInputFormat; + +public class TestMapredParquetInputFormat { + @Test + public void testDefaultConstructor() { + new MapredParquetInputFormat(); + } + + @SuppressWarnings("unchecked") + @Test + public void testConstructorWithParquetInputFormat() { + new MapredParquetInputFormat( + (ParquetInputFormat) mock(ParquetInputFormat.class) + ); + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapredParquetOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapredParquetOutputFormat.java new file mode 100644 index 0000000..417676d --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapredParquetOutputFormat.java @@ -0,0 +1,90 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport; +import org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.util.Progressable; +import org.junit.Test; + +import parquet.hadoop.ParquetOutputFormat; + +public class TestMapredParquetOutputFormat { + + @Test + public void testConstructor() { + new MapredParquetOutputFormat(); + } + + @SuppressWarnings("unchecked") + @Test + public void testConstructorWithFormat() { + new MapredParquetOutputFormat((ParquetOutputFormat) mock(ParquetOutputFormat.class)); + } + + @Test + public void testGetRecordWriterThrowsException() { + try { + new MapredParquetOutputFormat().getRecordWriter(null, null, null, null); + fail("should throw runtime exception."); + } catch (Exception e) { + assertEquals("Should never be used", e.getMessage()); + } + } + + @SuppressWarnings("unchecked") + @Test + public void testGetHiveRecordWriter() throws IOException { + Properties tableProps = new Properties(); + tableProps.setProperty("columns", "foo,bar"); + tableProps.setProperty("columns.types", "int:int"); + + final Progressable mockProgress = mock(Progressable.class); + final ParquetOutputFormat outputFormat = (ParquetOutputFormat) mock(ParquetOutputFormat.class); + + JobConf jobConf = new JobConf(); + + try { + new MapredParquetOutputFormat(outputFormat) { + @Override + protected ParquetRecordWriterWrapper getParquerRecordWriterWrapper( + ParquetOutputFormat realOutputFormat, + JobConf jobConf, + String finalOutPath, + Progressable progress + ) throws IOException { + assertEquals(outputFormat, realOutputFormat); + assertNotNull(jobConf.get(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA)); + assertEquals("/foo", finalOutPath.toString()); + assertEquals(mockProgress, progress); + throw new RuntimeException("passed tests"); + } + }.getHiveRecordWriter(jobConf, new Path("/foo"), null, false, tableProps, mockProgress); + fail("should throw runtime exception."); + } catch (RuntimeException e) { + assertEquals("passed tests", e.getMessage()); + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java new file mode 100644 index 0000000..be518b9 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java @@ -0,0 +1,140 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.util.Properties; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +import parquet.io.api.Binary; + +public class TestParquetSerDe extends TestCase { + + public void testParquetHiveSerDe() throws Throwable { + try { + // Create the SerDe + System.out.println("test: testParquetHiveSerDe"); + + final ParquetHiveSerDe serDe = new ParquetHiveSerDe(); + final Configuration conf = new Configuration(); + final Properties tbl = createProperties(); + serDe.initialize(conf, tbl); + + // Data + final Writable[] arr = new Writable[8]; + + arr[0] = new ByteWritable((byte) 123); + arr[1] = new ShortWritable((short) 456); + arr[2] = new IntWritable(789); + arr[3] = new LongWritable(1000l); + arr[4] = new DoubleWritable((double) 5.3); + arr[5] = new BinaryWritable(Binary.fromString("hive and hadoop and parquet. Big family.")); + + final Writable[] mapContainer = new Writable[1]; + final Writable[] map = new Writable[3]; + for (int i = 0; i < 3; ++i) { + final Writable[] pair = new Writable[2]; + pair[0] = new BinaryWritable(Binary.fromString("key_" + i)); + pair[1] = new IntWritable(i); + map[i] = new ArrayWritable(Writable.class, pair); + } + mapContainer[0] = new ArrayWritable(Writable.class, map); + arr[6] = new ArrayWritable(Writable.class, mapContainer); + + final Writable[] arrayContainer = new Writable[1]; + final Writable[] array = new Writable[5]; + for (int i = 0; i < 5; ++i) { + array[i] = new BinaryWritable(Binary.fromString("elem_" + i)); + } + arrayContainer[0] = new ArrayWritable(Writable.class, array); + arr[7] = new ArrayWritable(Writable.class, arrayContainer); + + final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr); + // Test + deserializeAndSerializeLazySimple(serDe, arrWritable); + System.out.println("test: testParquetHiveSerDe - OK"); + + } catch (final Throwable e) { + e.printStackTrace(); + throw e; + } + } + + private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException { + + // Get the row structure + final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); + + // Deserialize + final Object row = serDe.deserialize(t); + assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class); + assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length); + assertEquals("deserialization gives the wrong object", t, row); + + // Serialize + final ArrayWritable serializedArr = (ArrayWritable) serDe.serialize(row, oi); + assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), serializedArr.get().length); + assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, serializedArr)); + } + + private Properties createProperties() { + final Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty("columns", "abyte,ashort,aint,along,adouble,astring,amap,alist"); + tbl.setProperty("columns.types", "tinyint:smallint:int:bigint:double:string:map:array"); + tbl.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + return tbl; + } + + public static boolean arrayWritableEquals(final ArrayWritable a1, final ArrayWritable a2) { + final Writable[] a1Arr = a1.get(); + final Writable[] a2Arr = a2.get(); + + if (a1Arr.length != a2Arr.length) { + return false; + } + + for (int i = 0; i < a1Arr.length; ++i) { + if (a1Arr[i] instanceof ArrayWritable) { + if (!(a2Arr[i] instanceof ArrayWritable)) { + return false; + } + if (!arrayWritableEquals((ArrayWritable) a1Arr[i], (ArrayWritable) a2Arr[i])) { + return false; + } + } else { + if (!a1Arr[i].equals(a2Arr[i])) { + return false; + } + } + + } + return true; + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java new file mode 100644 index 0000000..ef05150 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java @@ -0,0 +1,98 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.HashMap; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.junit.Test; + +public class TestAbstractParquetMapInspector extends TestCase { + + class TestableAbstractParquetMapInspector extends AbstractParquetMapInspector { + + public TestableAbstractParquetMapInspector(ObjectInspector keyInspector, ObjectInspector valueInspector) { + super(keyInspector, valueInspector); + } + + @Override + public Object getMapValueElement(Object o, Object o1) { + throw new UnsupportedOperationException("Should not be called"); + } + } + private TestableAbstractParquetMapInspector inspector; + + @Override + public void setUp() { + inspector = new TestableAbstractParquetMapInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector); + } + + @Test + public void testNullMap() { + assertEquals("Wrong size", -1, inspector.getMapSize(null)); + assertNull("Should be null", inspector.getMap(null)); + } + + @Test + public void testNullContainer() { + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, null); + assertEquals("Wrong size", -1, inspector.getMapSize(map)); + assertNull("Should be null", inspector.getMap(map)); + } + + @Test + public void testEmptyContainer() { + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]); + assertEquals("Wrong size", -1, inspector.getMapSize(map)); + assertNull("Should be null", inspector.getMap(map)); + } + + @Test + public void testRegularMap() { + final Writable[] entry1 = new Writable[]{new IntWritable(0), new IntWritable(1)}; + final Writable[] entry2 = new Writable[]{new IntWritable(2), new IntWritable(3)}; + + final ArrayWritable internalMap = new ArrayWritable(ArrayWritable.class, new Writable[]{ + new ArrayWritable(Writable.class, entry1), new ArrayWritable(Writable.class, entry2)}); + + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new Writable[]{internalMap}); + + final Map expected = new HashMap(); + expected.put(new IntWritable(0), new IntWritable(1)); + expected.put(new IntWritable(2), new IntWritable(3)); + + assertEquals("Wrong size", 2, inspector.getMapSize(map)); + assertEquals("Wrong result of inspection", expected, inspector.getMap(map)); + } + + @Test + public void testHashMap() { + final Map map = new HashMap(); + map.put(new IntWritable(0), new IntWritable(1)); + map.put(new IntWritable(2), new IntWritable(3)); + map.put(new IntWritable(4), new IntWritable(5)); + map.put(new IntWritable(6), new IntWritable(7)); + + assertEquals("Wrong size", 4, inspector.getMapSize(map)); + assertEquals("Wrong result of inspection", map, inspector.getMap(map)); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestDeepParquetHiveMapInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestDeepParquetHiveMapInspector.java new file mode 100644 index 0000000..8646ff4 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestDeepParquetHiveMapInspector.java @@ -0,0 +1,90 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.HashMap; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.ql.io.parquet.serde.primitive.ParquetPrimitiveInspectorFactory; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.junit.Test; + +public class TestDeepParquetHiveMapInspector extends TestCase { + + private DeepParquetHiveMapInspector inspector; + + @Override + public void setUp() { + inspector = new DeepParquetHiveMapInspector(ParquetPrimitiveInspectorFactory.parquetShortInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector); + } + + @Test + public void testNullMap() { + assertNull("Should be null", inspector.getMapValueElement(null, new ShortWritable((short) 0))); + } + + @Test + public void testNullContainer() { + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, null); + assertNull("Should be null", inspector.getMapValueElement(map, new ShortWritable((short) 0))); + } + + @Test + public void testEmptyContainer() { + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]); + assertNull("Should be null", inspector.getMapValueElement(map, new ShortWritable((short) 0))); + } + + @Test + public void testRegularMap() { + final Writable[] entry1 = new Writable[]{new IntWritable(0), new IntWritable(1)}; + final Writable[] entry2 = new Writable[]{new IntWritable(2), new IntWritable(3)}; + + final ArrayWritable internalMap = new ArrayWritable(ArrayWritable.class, new Writable[]{ + new ArrayWritable(Writable.class, entry1), new ArrayWritable(Writable.class, entry2)}); + + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new Writable[]{internalMap}); + + assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0))); + assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2))); + assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new ShortWritable((short) 0))); + assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new ShortWritable((short) 2))); + } + + @Test + public void testHashMap() { + final Map map = new HashMap(); + map.put(new IntWritable(0), new IntWritable(1)); + map.put(new IntWritable(2), new IntWritable(3)); + map.put(new IntWritable(4), new IntWritable(5)); + map.put(new IntWritable(6), new IntWritable(7)); + + + assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0))); + assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2))); + assertEquals("Wrong result of inspection", new IntWritable(5), inspector.getMapValueElement(map, new IntWritable(4))); + assertEquals("Wrong result of inspection", new IntWritable(7), inspector.getMapValueElement(map, new IntWritable(6))); + assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new ShortWritable((short) 0))); + assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new ShortWritable((short) 2))); + assertEquals("Wrong result of inspection", new IntWritable(5), inspector.getMapValueElement(map, new ShortWritable((short) 4))); + assertEquals("Wrong result of inspection", new IntWritable(7), inspector.getMapValueElement(map, new ShortWritable((short) 6))); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java new file mode 100644 index 0000000..f3a24af --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java @@ -0,0 +1,80 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.ArrayList; +import java.util.List; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.junit.Test; + +public class TestParquetHiveArrayInspector extends TestCase { + + private ParquetHiveArrayInspector inspector; + + @Override + public void setUp() { + inspector = new ParquetHiveArrayInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector); + } + + @Test + public void testNullArray() { + assertEquals("Wrong size", -1, inspector.getListLength(null)); + assertNull("Should be null", inspector.getList(null)); + assertNull("Should be null", inspector.getListElement(null, 0)); + } + + @Test + public void testNullContainer() { + final ArrayWritable list = new ArrayWritable(ArrayWritable.class, null); + assertEquals("Wrong size", -1, inspector.getListLength(list)); + assertNull("Should be null", inspector.getList(list)); + assertNull("Should be null", inspector.getListElement(list, 0)); + } + + @Test + public void testEmptyContainer() { + final ArrayWritable list = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]); + assertEquals("Wrong size", -1, inspector.getListLength(list)); + assertNull("Should be null", inspector.getList(list)); + assertNull("Should be null", inspector.getListElement(list, 0)); + } + + @Test + public void testRegularList() { + final ArrayWritable internalList = new ArrayWritable(Writable.class, + new Writable[]{new IntWritable(3), new IntWritable(5), new IntWritable(1)}); + final ArrayWritable list = new ArrayWritable(ArrayWritable.class, new ArrayWritable[]{internalList}); + + final List expected = new ArrayList(); + expected.add(new IntWritable(3)); + expected.add(new IntWritable(5)); + expected.add(new IntWritable(1)); + + assertEquals("Wrong size", 3, inspector.getListLength(list)); + assertEquals("Wrong result of inspection", expected, inspector.getList(list)); + + for (int i = 0; i < expected.size(); ++i) { + assertEquals("Wrong result of inspection", expected.get(i), inspector.getListElement(list, i)); + + } + + assertNull("Should be null", inspector.getListElement(list, 3)); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestStandardParquetHiveMapInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestStandardParquetHiveMapInspector.java new file mode 100644 index 0000000..278419f --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestStandardParquetHiveMapInspector.java @@ -0,0 +1,88 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.util.HashMap; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.junit.Test; + +public class TestStandardParquetHiveMapInspector extends TestCase { + + private StandardParquetHiveMapInspector inspector; + + @Override + public void setUp() { + inspector = new StandardParquetHiveMapInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector); + } + + @Test + public void testNullMap() { + assertNull("Should be null", inspector.getMapValueElement(null, new IntWritable(0))); + } + + @Test + public void testNullContainer() { + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, null); + assertNull("Should be null", inspector.getMapValueElement(map, new IntWritable(0))); + } + + @Test + public void testEmptyContainer() { + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]); + assertNull("Should be null", inspector.getMapValueElement(map, new IntWritable(0))); + } + + @Test + public void testRegularMap() { + final Writable[] entry1 = new Writable[]{new IntWritable(0), new IntWritable(1)}; + final Writable[] entry2 = new Writable[]{new IntWritable(2), new IntWritable(3)}; + + final ArrayWritable internalMap = new ArrayWritable(ArrayWritable.class, new Writable[]{ + new ArrayWritable(Writable.class, entry1), new ArrayWritable(Writable.class, entry2)}); + + final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new Writable[]{internalMap}); + + assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0))); + assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2))); + assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 0))); + assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 2))); + } + + @Test + public void testHashMap() { + final Map map = new HashMap(); + map.put(new IntWritable(0), new IntWritable(1)); + map.put(new IntWritable(2), new IntWritable(3)); + map.put(new IntWritable(4), new IntWritable(5)); + map.put(new IntWritable(6), new IntWritable(7)); + + assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0))); + assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2))); + assertEquals("Wrong result of inspection", new IntWritable(5), inspector.getMapValueElement(map, new IntWritable(4))); + assertEquals("Wrong result of inspection", new IntWritable(7), inspector.getMapValueElement(map, new IntWritable(6))); + assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 0))); + assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 2))); + assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 4))); + assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 6))); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java index d0cbed6..d218271 100755 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java @@ -232,6 +232,11 @@ private void validateTable(Table tbl, String tableName) throws MetaException { Warehouse wh = new Warehouse(hiveConf); Table ft = null; try { + // hm.getTable result will not have privileges set (it does not retrieve + // that part from metastore), so unset privileges to null before comparing + // (create table sets it to empty (non null) structures) + tbl.getTTable().setPrivilegesIsSet(false); + ft = hm.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); assertNotNull("Unable to fetch table", ft); ft.checkValidity(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/TestSessionUserName.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/TestSessionUserName.java index 86d5f46..4c5b2bf 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/TestSessionUserName.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/TestSessionUserName.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerImpl; @@ -106,8 +107,8 @@ private HiveConf getAuthV2HiveConf() { @Override public HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreClientFactory, - HiveConf conf, String hiveCurrentUser) { - username = hiveCurrentUser; + HiveConf conf, HiveAuthenticationProvider authenticator) { + username = authenticator.getUserName(); return new HiveAuthorizerImpl(null, null); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/plugin/sqlstd/TestOperation2Privilege.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/plugin/sqlstd/TestOperation2Privilege.java new file mode 100644 index 0000000..31c4443 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/plugin/sqlstd/TestOperation2Privilege.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.authorization.plugin.sqlstd; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.util.Set; + +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; +import org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.Operation2Privilege; +import org.junit.Test; + +/** + * Test HiveOperationType + */ +public class TestOperation2Privilege { + + /** + * test that all enums in {@link HiveOperationType} match one map entry in + * Operation2Privilege + */ + @Test + public void checkHiveOperationTypeMatch() { + Set operationMapKeys = Operation2Privilege.getOperationTypes(); + for (HiveOperationType operationType : HiveOperationType.values()) { + if (!operationMapKeys.contains(operationType)) { + fail("Unable to find corresponding entry in Operation2Privilege map for HiveOperationType " + + operationType); + } + } + assertEquals("Check if Operation2Privilege, HiveOperationType have same number of instances", + operationMapKeys.size(), HiveOperationType.values().length); + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java index 732897f..ac5053a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java @@ -39,25 +39,26 @@ public void setUp() throws Exception { @Test public void testInvalidCommands() throws Exception { Assert.assertNull("Null should have returned null", CommandProcessorFactory.getForHiveCommand(null, conf)); - Assert.assertNull("Blank should have returned null", CommandProcessorFactory.getForHiveCommand(" ", conf)); - Assert.assertNull("SQL should have returned null", CommandProcessorFactory.getForHiveCommand("SELECT * FROM TABLE", conf)); + Assert.assertNull("Blank should have returned null", CommandProcessorFactory.getForHiveCommand(new String[]{" "}, conf)); + Assert.assertNull("set role should have returned null", CommandProcessorFactory.getForHiveCommand(new String[]{"set role"}, conf)); + Assert.assertNull("SQL should have returned null", CommandProcessorFactory.getForHiveCommand(new String[]{"SELECT * FROM TABLE"}, conf)); } @Test public void testAvailableCommands() throws Exception { SessionState.start(conf); for (HiveCommand command : HiveCommand.values()) { String cmd = command.name(); - Assert.assertNotNull("Cmd " + cmd + " not return null", CommandProcessorFactory.getForHiveCommand(cmd, conf)); + Assert.assertNotNull("Cmd " + cmd + " not return null", CommandProcessorFactory.getForHiveCommand(new String[]{cmd}, conf)); } for (HiveCommand command : HiveCommand.values()) { String cmd = command.name().toLowerCase(); - Assert.assertNotNull("Cmd " + cmd + " not return null", CommandProcessorFactory.getForHiveCommand(cmd, conf)); + Assert.assertNotNull("Cmd " + cmd + " not return null", CommandProcessorFactory.getForHiveCommand(new String[]{cmd}, conf)); } conf.set(HiveConf.ConfVars.HIVE_SECURITY_COMMAND_WHITELIST.toString(), ""); for (HiveCommand command : HiveCommand.values()) { String cmd = command.name(); try { - CommandProcessorFactory.getForHiveCommand(cmd, conf); + CommandProcessorFactory.getForHiveCommand(new String[]{cmd}, conf); Assert.fail("Expected SQLException for " + cmd + " as available commands is empty"); } catch (SQLException e) { Assert.assertEquals("Insufficient privileges to execute " + cmd, e.getMessage()); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMath.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMath.java index dde978e..8cf0452 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMath.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMath.java @@ -136,6 +136,14 @@ public void testRadians() throws HiveException { } @Test + public void testDegrees() throws HiveException { + UDFDegrees udf = new UDFDegrees(); + input = createDecimal("0.7853981633974483"); + DoubleWritable res = udf.evaluate(input); + Assert.assertEquals(45.0, res.get(), 0.000001); + } + + @Test public void testSqrt() throws HiveException { UDFSqrt udf = new UDFSqrt(); input = createDecimal("49.0"); diff --git a/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q b/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q new file mode 100644 index 0000000..17d2b46 --- /dev/null +++ b/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q @@ -0,0 +1,14 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_allf(i int); + +-- grant insert to user2 WITH grant option +GRANT INSERT ON table_priv_allf TO USER user2 with grant option; + +set user.name=user2; +-- try grant all to user3, without having all privileges +GRANT ALL ON table_priv_allf TO USER user3; diff --git a/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q b/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q new file mode 100644 index 0000000..140f5b0 --- /dev/null +++ b/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q @@ -0,0 +1,11 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_gfail1(i int); + +set user.name=user2; +-- try grant insert to user3 as user2 +GRANT INSERT ON table_priv_gfail1 TO USER user3; diff --git a/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q b/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q new file mode 100644 index 0000000..8d20919 --- /dev/null +++ b/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q @@ -0,0 +1,14 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_gfail1(i int); + +-- grant insert to user2 WITHOUT grant option +GRANT INSERT ON table_priv_gfail1 TO USER user2; + +set user.name=user2; +-- try grant insert to user3 +GRANT INSERT ON table_priv_gfail1 TO USER user3; diff --git a/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q b/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q new file mode 100644 index 0000000..f2e3eab --- /dev/null +++ b/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q @@ -0,0 +1,14 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rfail1(i int); + +-- grant insert to user2 +GRANT INSERT ON table_priv_rfail1 TO USER user2; + +set user.name=user3; +-- try dropping the privilege as user3 +REVOKE INSERT ON TABLE table_priv_rfail1 FROM USER user2; diff --git a/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q b/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q new file mode 100644 index 0000000..edb5b65 --- /dev/null +++ b/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q @@ -0,0 +1,18 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rfai2(i int); + +-- grant insert to user2 +GRANT INSERT ON table_priv_rfai2 TO USER user2; +GRANT SELECT ON table_priv_rfai2 TO USER user3 WITH GRANT OPTION; + +set user.name=user3; +-- grant select as user3 to user 2 +GRANT SELECT ON table_priv_rfai2 TO USER user2; + +-- try dropping the privilege as user3 +REVOKE INSERT ON TABLE table_priv_rfai2 FROM USER user2; diff --git a/ql/src/test/queries/clientpositive/authorization_1_sql_std.q b/ql/src/test/queries/clientpositive/authorization_1_sql_std.q index 65cfeb3..44d73fc 100644 --- a/ql/src/test/queries/clientpositive/authorization_1_sql_std.q +++ b/ql/src/test/queries/clientpositive/authorization_1_sql_std.q @@ -6,18 +6,18 @@ set hive.security.authorization.enabled=true; --table grant to user -grant select on table src_autho_test to user hive_test_user; +grant select on table src_autho_test to user user_sauth; -show grant user hive_test_user on table src_autho_test; +show grant user user_sauth on table src_autho_test; -revoke select on table src_autho_test from user hive_test_user; -show grant user hive_test_user on table src_autho_test; +revoke select on table src_autho_test from user user_sauth; +show grant user user_sauth on table src_autho_test; --role create role src_role; -grant role src_role to user hive_test_user; -show role grant user hive_test_user; +grant role src_role to user user_sauth; +show role grant user user_sauth; --table grant to role diff --git a/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q b/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q new file mode 100644 index 0000000..4dde2b0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q @@ -0,0 +1,10 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; + +create table create_table_creator_priv_test(i int); + +-- all privileges should have been set for user + +show grant user user1 on table create_table_creator_priv_test; diff --git a/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q b/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q new file mode 100644 index 0000000..c18f5b4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q @@ -0,0 +1,43 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv1(i int); + +-- all privileges should have been set for user + +-- grant insert privilege to another user +GRANT INSERT ON table_priv1 TO USER user2; +SHOW GRANT USER user2 ON TABLE table_priv1; + +-- grant select privilege to another user with grant +GRANT SELECT ON table_priv1 TO USER user2 with grant option; +SHOW GRANT USER user2 ON TABLE table_priv1; + +set user.name=user2; +-- change to other user - user2 +-- grant permissions to another user as user2 +GRANT SELECT ON table_priv1 TO USER user3 with grant option; +SHOW GRANT USER user3 ON TABLE table_priv1; + +set user.name=user3; +-- change to other user - user3 +-- grant permissions to another user as user3 +GRANT SELECT ON table_priv1 TO USER user4 with grant option; +SHOW GRANT USER user4 ON TABLE table_priv1; + +set user.name=user1; +-- switched back to table owner + +-- grant all with grant to user22 +GRANT ALL ON table_priv1 TO USER user22 with grant option; +SHOW GRANT USER user22 ON TABLE table_priv1; + +set user.name=user22; + +-- grant all without grant to user33 +GRANT ALL ON table_priv1 TO USER user33 with grant option; +SHOW GRANT USER user33 ON TABLE table_priv1; + diff --git a/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q b/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q new file mode 100644 index 0000000..bf5db29 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q @@ -0,0 +1,50 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; +set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; + +set user.name=user1; +-- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rev(i int); + +-- grant insert privilege to user2 +GRANT INSERT ON table_priv_rev TO USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- revoke insert privilege from user2 +REVOKE INSERT ON TABLE table_priv_rev FROM USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- grant all privileges one at a time -- +-- grant insert privilege to user2 +GRANT INSERT ON table_priv_rev TO USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- grant select privilege to user2, with grant option +GRANT SELECT ON table_priv_rev TO USER user2 WITH GRANT OPTION; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- grant update privilege to user2 +GRANT UPDATE ON table_priv_rev TO USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- grant delete privilege to user2 +GRANT DELETE ON table_priv_rev TO USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- start revoking -- +-- revoke update privilege from user2 +REVOKE UPDATE ON TABLE table_priv_rev FROM USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- revoke DELETE privilege from user2 +REVOKE DELETE ON TABLE table_priv_rev FROM USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- revoke insert privilege from user2 +REVOKE INSERT ON TABLE table_priv_rev FROM USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + +-- revoke select privilege from user2 +REVOKE SELECT ON TABLE table_priv_rev FROM USER user2; +SHOW GRANT USER user2 ON TABLE table_priv_rev; + diff --git a/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q b/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q new file mode 100644 index 0000000..1ff8192 --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q @@ -0,0 +1,17 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory; + +show current roles; + +create role r1; +grant role r1 to user hive_test_user; +set role r1; +show current roles; + +set role PUBLIC; +show current roles; + +set role NONE; +show current roles; + +drop role r1; + diff --git a/ql/src/test/queries/clientpositive/parquet_create.q b/ql/src/test/queries/clientpositive/parquet_create.q new file mode 100644 index 0000000..0b976bd --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_create.q @@ -0,0 +1,36 @@ +DROP TABLE parquet_create_staging; +DROP TABLE parquet_create; + +CREATE TABLE parquet_create_staging ( + id int, + str string, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':'; + +CREATE TABLE parquet_create ( + id int, + str string, + mp MAP, + lst ARRAY, + strct STRUCT +) STORED AS PARQUET; + +DESCRIBE FORMATTED parquet_create; + +LOAD DATA LOCAL INPATH '../../data/files/parquet_create.txt' OVERWRITE INTO TABLE parquet_create_staging; + +SELECT * FROM parquet_create_staging; + +INSERT OVERWRITE TABLE parquet_create SELECT * FROM parquet_create_staging; + +SELECT * FROM parquet_create group by id; +SELECT id, count(0) FROM parquet_create group by id; +SELECT str from parquet_create; +SELECT mp from parquet_create; +SELECT lst from parquet_create; +SELECT strct from parquet_create; diff --git a/ql/src/test/queries/clientpositive/parquet_partitioned.q b/ql/src/test/queries/clientpositive/parquet_partitioned.q new file mode 100644 index 0000000..103d26f --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_partitioned.q @@ -0,0 +1,34 @@ +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.exec.dynamic.partition=true; + +DROP TABLE parquet_partitioned_staging; +DROP TABLE parquet_partitioned; + +CREATE TABLE parquet_partitioned_staging ( + id int, + str string, + part string +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +CREATE TABLE parquet_partitioned ( + id int, + str string +) PARTITIONED BY (part string) +STORED AS PARQUET; + +DESCRIBE FORMATTED parquet_partitioned; + +LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_partitioned_staging; + +SELECT * FROM parquet_partitioned_staging; + +INSERT OVERWRITE TABLE parquet_partitioned PARTITION (part) SELECT * FROM parquet_partitioned_staging; + +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SELECT * FROM parquet_partitioned; +SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part; + +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SELECT * FROM parquet_partitioned; +SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part; diff --git a/ql/src/test/results/clientnegative/authorization_grant_table_allpriv.q.out b/ql/src/test/results/clientnegative/authorization_grant_table_allpriv.q.out new file mode 100644 index 0000000..db0c36f --- /dev/null +++ b/ql/src/test/results/clientnegative/authorization_grant_table_allpriv.q.out @@ -0,0 +1,22 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_allf(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_allf(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@table_priv_allf +PREHOOK: query: -- grant insert to user2 WITH grant option +GRANT INSERT ON table_priv_allf TO USER user2 with grant option +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_allf +POSTHOOK: query: -- grant insert to user2 WITH grant option +GRANT INSERT ON table_priv_allf TO USER user2 with grant option +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_allf +PREHOOK: query: -- try grant all to user3, without having all privileges +GRANT ALL ON table_priv_allf TO USER user3 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_allf +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Permission denied. Principal [name=user2, type=USER] does not have following privileges on Hive Object [type=TABLE, dbname=default, table/viewname=table_priv_allf] : [SELECT with grant, UPDATE with grant, DELETE with grant] diff --git a/ql/src/test/results/clientnegative/authorization_grant_table_fail1.q.out b/ql/src/test/results/clientnegative/authorization_grant_table_fail1.q.out new file mode 100644 index 0000000..156c555 --- /dev/null +++ b/ql/src/test/results/clientnegative/authorization_grant_table_fail1.q.out @@ -0,0 +1,14 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_gfail1(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_gfail1(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@table_priv_gfail1 +PREHOOK: query: -- try grant insert to user3 as user2 +GRANT INSERT ON table_priv_gfail1 TO USER user3 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_gfail1 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Permission denied. Principal [name=user2, type=USER] does not have following privileges on Hive Object [type=TABLE, dbname=default, table/viewname=table_priv_gfail1] : [INSERT with grant] diff --git a/ql/src/test/results/clientnegative/authorization_grant_table_fail_nogrant.q.out b/ql/src/test/results/clientnegative/authorization_grant_table_fail_nogrant.q.out new file mode 100644 index 0000000..e584e06 --- /dev/null +++ b/ql/src/test/results/clientnegative/authorization_grant_table_fail_nogrant.q.out @@ -0,0 +1,22 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_gfail1(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_gfail1(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@table_priv_gfail1 +PREHOOK: query: -- grant insert to user2 WITHOUT grant option +GRANT INSERT ON table_priv_gfail1 TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_gfail1 +POSTHOOK: query: -- grant insert to user2 WITHOUT grant option +GRANT INSERT ON table_priv_gfail1 TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_gfail1 +PREHOOK: query: -- try grant insert to user3 +GRANT INSERT ON table_priv_gfail1 TO USER user3 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_gfail1 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Permission denied. Principal [name=user2, type=USER] does not have following privileges on Hive Object [type=TABLE, dbname=default, table/viewname=table_priv_gfail1] : [INSERT with grant] diff --git a/ql/src/test/results/clientnegative/authorization_invalid_priv_v1.q.out b/ql/src/test/results/clientnegative/authorization_invalid_priv_v1.q.out index 425745e..10d1ca8 100644 --- a/ql/src/test/results/clientnegative/authorization_invalid_priv_v1.q.out +++ b/ql/src/test/results/clientnegative/authorization_invalid_priv_v1.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table if not exists authorization_invalid_v1 (key int, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@authorization_invalid_v1 -FAILED: SemanticException undefined privilege TOK_PRIV_DELETE +FAILED: SemanticException Undefined privilege Delete diff --git a/ql/src/test/results/clientnegative/authorization_invalid_priv_v2.q.out b/ql/src/test/results/clientnegative/authorization_invalid_priv_v2.q.out index 6adc333..62aa8da 100644 --- a/ql/src/test/results/clientnegative/authorization_invalid_priv_v2.q.out +++ b/ql/src/test/results/clientnegative/authorization_invalid_priv_v2.q.out @@ -6,4 +6,4 @@ POSTHOOK: Output: default@authorization_invalid_v2 PREHOOK: query: grant index on table authorization_invalid_v2 to user hive_test_user PREHOOK: type: GRANT_PRIVILEGE PREHOOK: Output: default@authorization_invalid_v2 -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Privilege: Index is not supported in sql standard authorization mode +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unsupported privilege type INDEX diff --git a/ql/src/test/results/clientnegative/authorization_revoke_table_fail1.q.out b/ql/src/test/results/clientnegative/authorization_revoke_table_fail1.q.out new file mode 100644 index 0000000..696f29b --- /dev/null +++ b/ql/src/test/results/clientnegative/authorization_revoke_table_fail1.q.out @@ -0,0 +1,23 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rfail1(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rfail1(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@table_priv_rfail1 +PREHOOK: query: -- grant insert to user2 +GRANT INSERT ON table_priv_rfail1 TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rfail1 +POSTHOOK: query: -- grant insert to user2 +GRANT INSERT ON table_priv_rfail1 TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rfail1 +PREHOOK: query: -- try dropping the privilege as user3 +REVOKE INSERT ON TABLE table_priv_rfail1 FROM USER user2 +PREHOOK: type: REVOKE_PRIVILEGE +PREHOOK: Output: default@table_priv_rfail1 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot find privilege Privilege [name=INSERT, columns=null] for Principal [name=user2, type=USER] on Hive Object [type=TABLE, dbname=default, table/viewname=table_priv_rfail1] granted by user3 + diff --git a/ql/src/test/results/clientnegative/authorization_revoke_table_fail2.q.out b/ql/src/test/results/clientnegative/authorization_revoke_table_fail2.q.out new file mode 100644 index 0000000..5fbfd17 --- /dev/null +++ b/ql/src/test/results/clientnegative/authorization_revoke_table_fail2.q.out @@ -0,0 +1,37 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rfai2(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rfai2(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@table_priv_rfai2 +PREHOOK: query: -- grant insert to user2 +GRANT INSERT ON table_priv_rfai2 TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rfai2 +POSTHOOK: query: -- grant insert to user2 +GRANT INSERT ON table_priv_rfai2 TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rfai2 +PREHOOK: query: GRANT SELECT ON table_priv_rfai2 TO USER user3 WITH GRANT OPTION +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rfai2 +POSTHOOK: query: GRANT SELECT ON table_priv_rfai2 TO USER user3 WITH GRANT OPTION +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rfai2 +PREHOOK: query: -- grant select as user3 to user 2 +GRANT SELECT ON table_priv_rfai2 TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rfai2 +POSTHOOK: query: -- grant select as user3 to user 2 +GRANT SELECT ON table_priv_rfai2 TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rfai2 +PREHOOK: query: -- try dropping the privilege as user3 +REVOKE INSERT ON TABLE table_priv_rfai2 FROM USER user2 +PREHOOK: type: REVOKE_PRIVILEGE +PREHOOK: Output: default@table_priv_rfai2 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot find privilege Privilege [name=INSERT, columns=null] for Principal [name=user2, type=USER] on Hive Object [type=TABLE, dbname=default, table/viewname=table_priv_rfai2] granted by user3 + diff --git a/ql/src/test/results/clientnegative/authorization_role_cycles1.q.out b/ql/src/test/results/clientnegative/authorization_role_cycles1.q.out index 9d2c3be..da9014d 100644 --- a/ql/src/test/results/clientnegative/authorization_role_cycles1.q.out +++ b/ql/src/test/results/clientnegative/authorization_role_cycles1.q.out @@ -15,4 +15,4 @@ POSTHOOK: type: GRANT_ROLE PREHOOK: query: -- this will create a cycle grant role role2 to role role1 PREHOOK: type: GRANT_ROLE -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationPluginException: Cannot grant role role1 to role2 as role2 already belongs to the role role1. (no cycles allowed) +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException: Cannot grant role role1 to role2 as role2 already belongs to the role role1. (no cycles allowed) diff --git a/ql/src/test/results/clientnegative/authorization_role_cycles2.q.out b/ql/src/test/results/clientnegative/authorization_role_cycles2.q.out index be9a491..f54b88e 100644 --- a/ql/src/test/results/clientnegative/authorization_role_cycles2.q.out +++ b/ql/src/test/results/clientnegative/authorization_role_cycles2.q.out @@ -41,4 +41,4 @@ POSTHOOK: type: GRANT_ROLE PREHOOK: query: -- this will create a cycle in middle of the hierarchy grant role role2 to role role4 PREHOOK: type: GRANT_ROLE -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationPluginException: Cannot grant role role4 to role2 as role2 already belongs to the role role4. (no cycles allowed) +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException: Cannot grant role role4 to role2 as role2 already belongs to the role role4. (no cycles allowed) diff --git a/ql/src/test/results/clientpositive/authorization_1_sql_std.q.out b/ql/src/test/results/clientpositive/authorization_1_sql_std.q.out index 0874b92..8184708 100644 --- a/ql/src/test/results/clientpositive/authorization_1_sql_std.q.out +++ b/ql/src/test/results/clientpositive/authorization_1_sql_std.q.out @@ -5,28 +5,28 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@src_autho_test PREHOOK: query: --table grant to user -grant select on table src_autho_test to user hive_test_user +grant select on table src_autho_test to user user_sauth PREHOOK: type: GRANT_PRIVILEGE PREHOOK: Output: default@src_autho_test POSTHOOK: query: --table grant to user -grant select on table src_autho_test to user hive_test_user +grant select on table src_autho_test to user user_sauth POSTHOOK: type: GRANT_PRIVILEGE POSTHOOK: Output: default@src_autho_test -PREHOOK: query: show grant user hive_test_user on table src_autho_test +PREHOOK: query: show grant user user_sauth on table src_autho_test PREHOOK: type: SHOW_GRANT -POSTHOOK: query: show grant user hive_test_user on table src_autho_test +POSTHOOK: query: show grant user user_sauth on table src_autho_test POSTHOOK: type: SHOW_GRANT -default src_autho_test hive_test_user USER Select false -1 hive_test_user -PREHOOK: query: revoke select on table src_autho_test from user hive_test_user +default src_autho_test user_sauth USER SELECT false -1 hive_test_user +PREHOOK: query: revoke select on table src_autho_test from user user_sauth PREHOOK: type: REVOKE_PRIVILEGE PREHOOK: Output: default@src_autho_test -POSTHOOK: query: revoke select on table src_autho_test from user hive_test_user +POSTHOOK: query: revoke select on table src_autho_test from user user_sauth POSTHOOK: type: REVOKE_PRIVILEGE POSTHOOK: Output: default@src_autho_test -PREHOOK: query: show grant user hive_test_user on table src_autho_test +PREHOOK: query: show grant user user_sauth on table src_autho_test PREHOOK: type: SHOW_GRANT -POSTHOOK: query: show grant user hive_test_user on table src_autho_test +POSTHOOK: query: show grant user user_sauth on table src_autho_test POSTHOOK: type: SHOW_GRANT PREHOOK: query: --role create role src_role @@ -34,15 +34,15 @@ PREHOOK: type: CREATEROLE POSTHOOK: query: --role create role src_role POSTHOOK: type: CREATEROLE -PREHOOK: query: grant role src_role to user hive_test_user +PREHOOK: query: grant role src_role to user user_sauth PREHOOK: type: GRANT_ROLE -POSTHOOK: query: grant role src_role to user hive_test_user +POSTHOOK: query: grant role src_role to user user_sauth POSTHOOK: type: GRANT_ROLE -PREHOOK: query: show role grant user hive_test_user +PREHOOK: query: show role grant user user_sauth PREHOOK: type: SHOW_ROLE_GRANT -POSTHOOK: query: show role grant user hive_test_user +POSTHOOK: query: show role grant user user_sauth POSTHOOK: type: SHOW_ROLE_GRANT -src_role -1 hive_test_user USER false -1 hive_test_user +src_role -1 user_sauth USER false -1 hive_test_user PUBLIC -1 false -1 PREHOOK: query: --table grant to role @@ -58,7 +58,7 @@ PREHOOK: query: show grant role src_role on table src_autho_test PREHOOK: type: SHOW_GRANT POSTHOOK: query: show grant role src_role on table src_autho_test POSTHOOK: type: SHOW_GRANT -default src_autho_test src_role ROLE Select false -1 hive_test_user +default src_autho_test src_role ROLE SELECT false -1 hive_test_user PREHOOK: query: revoke select on table src_autho_test from role src_role PREHOOK: type: REVOKE_PRIVILEGE PREHOOK: Output: default@src_autho_test diff --git a/ql/src/test/results/clientpositive/authorization_create_table_owner_privs.q.out b/ql/src/test/results/clientpositive/authorization_create_table_owner_privs.q.out new file mode 100644 index 0000000..b1bce1c --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_create_table_owner_privs.q.out @@ -0,0 +1,17 @@ +PREHOOK: query: create table create_table_creator_priv_test(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table create_table_creator_priv_test(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@create_table_creator_priv_test +PREHOOK: query: -- all privileges should have been set for user + +show grant user user1 on table create_table_creator_priv_test +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: -- all privileges should have been set for user + +show grant user user1 on table create_table_creator_priv_test +POSTHOOK: type: SHOW_GRANT +default create_table_creator_priv_test user1 USER DELETE true -1 user1 +default create_table_creator_priv_test user1 USER INSERT true -1 user1 +default create_table_creator_priv_test user1 USER SELECT true -1 user1 +default create_table_creator_priv_test user1 USER UPDATE true -1 user1 diff --git a/ql/src/test/results/clientpositive/authorization_grant_table_priv.q.out b/ql/src/test/results/clientpositive/authorization_grant_table_priv.q.out new file mode 100644 index 0000000..1e5c031 --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_grant_table_priv.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv1(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv1(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@table_priv1 +PREHOOK: query: -- all privileges should have been set for user + +-- grant insert privilege to another user +GRANT INSERT ON table_priv1 TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv1 +POSTHOOK: query: -- all privileges should have been set for user + +-- grant insert privilege to another user +GRANT INSERT ON table_priv1 TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv1 +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv1 +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv1 +POSTHOOK: type: SHOW_GRANT +default table_priv1 user2 USER INSERT false -1 user1 +PREHOOK: query: -- grant select privilege to another user with grant +GRANT SELECT ON table_priv1 TO USER user2 with grant option +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv1 +POSTHOOK: query: -- grant select privilege to another user with grant +GRANT SELECT ON table_priv1 TO USER user2 with grant option +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv1 +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv1 +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv1 +POSTHOOK: type: SHOW_GRANT +default table_priv1 user2 USER INSERT false -1 user1 +default table_priv1 user2 USER SELECT true -1 user1 +PREHOOK: query: -- change to other user - user2 +-- grant permissions to another user as user2 +GRANT SELECT ON table_priv1 TO USER user3 with grant option +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv1 +POSTHOOK: query: -- change to other user - user2 +-- grant permissions to another user as user2 +GRANT SELECT ON table_priv1 TO USER user3 with grant option +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv1 +PREHOOK: query: SHOW GRANT USER user3 ON TABLE table_priv1 +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user3 ON TABLE table_priv1 +POSTHOOK: type: SHOW_GRANT +default table_priv1 user3 USER SELECT true -1 user2 +PREHOOK: query: -- change to other user - user3 +-- grant permissions to another user as user3 +GRANT SELECT ON table_priv1 TO USER user4 with grant option +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv1 +POSTHOOK: query: -- change to other user - user3 +-- grant permissions to another user as user3 +GRANT SELECT ON table_priv1 TO USER user4 with grant option +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv1 +PREHOOK: query: SHOW GRANT USER user4 ON TABLE table_priv1 +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user4 ON TABLE table_priv1 +POSTHOOK: type: SHOW_GRANT +default table_priv1 user4 USER SELECT true -1 user3 +#### A masked pattern was here #### + +-- grant all with grant to user22 +GRANT ALL ON table_priv1 TO USER user22 with grant option +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv1 +#### A masked pattern was here #### + +-- grant all with grant to user22 +GRANT ALL ON table_priv1 TO USER user22 with grant option +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv1 +PREHOOK: query: SHOW GRANT USER user22 ON TABLE table_priv1 +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user22 ON TABLE table_priv1 +POSTHOOK: type: SHOW_GRANT +default table_priv1 user22 USER DELETE true -1 user1 +default table_priv1 user22 USER INSERT true -1 user1 +default table_priv1 user22 USER SELECT true -1 user1 +default table_priv1 user22 USER UPDATE true -1 user1 +PREHOOK: query: -- grant all without grant to user33 +GRANT ALL ON table_priv1 TO USER user33 with grant option +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv1 +POSTHOOK: query: -- grant all without grant to user33 +GRANT ALL ON table_priv1 TO USER user33 with grant option +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv1 +PREHOOK: query: SHOW GRANT USER user33 ON TABLE table_priv1 +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user33 ON TABLE table_priv1 +POSTHOOK: type: SHOW_GRANT +default table_priv1 user33 USER DELETE true -1 user22 +default table_priv1 user33 USER INSERT true -1 user22 +default table_priv1 user33 USER SELECT true -1 user22 +default table_priv1 user33 USER UPDATE true -1 user22 diff --git a/ql/src/test/results/clientpositive/authorization_revoke_table_priv.q.out b/ql/src/test/results/clientpositive/authorization_revoke_table_priv.q.out new file mode 100644 index 0000000..7ea601d --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_revoke_table_priv.q.out @@ -0,0 +1,150 @@ +PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rev(i int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) + +CREATE TABLE table_priv_rev(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: -- grant insert privilege to user2 +GRANT INSERT ON table_priv_rev TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- grant insert privilege to user2 +GRANT INSERT ON table_priv_rev TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER INSERT false -1 user1 +PREHOOK: query: -- revoke insert privilege from user2 +REVOKE INSERT ON TABLE table_priv_rev FROM USER user2 +PREHOOK: type: REVOKE_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- revoke insert privilege from user2 +REVOKE INSERT ON TABLE table_priv_rev FROM USER user2 +POSTHOOK: type: REVOKE_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +PREHOOK: query: -- grant all privileges one at a time -- +-- grant insert privilege to user2 +GRANT INSERT ON table_priv_rev TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- grant all privileges one at a time -- +-- grant insert privilege to user2 +GRANT INSERT ON table_priv_rev TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER INSERT false -1 user1 +PREHOOK: query: -- grant select privilege to user2, with grant option +GRANT SELECT ON table_priv_rev TO USER user2 WITH GRANT OPTION +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- grant select privilege to user2, with grant option +GRANT SELECT ON table_priv_rev TO USER user2 WITH GRANT OPTION +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER INSERT false -1 user1 +default table_priv_rev user2 USER SELECT true -1 user1 +PREHOOK: query: -- grant update privilege to user2 +GRANT UPDATE ON table_priv_rev TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- grant update privilege to user2 +GRANT UPDATE ON table_priv_rev TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER INSERT false -1 user1 +default table_priv_rev user2 USER UPDATE false -1 user1 +default table_priv_rev user2 USER SELECT true -1 user1 +PREHOOK: query: -- grant delete privilege to user2 +GRANT DELETE ON table_priv_rev TO USER user2 +PREHOOK: type: GRANT_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- grant delete privilege to user2 +GRANT DELETE ON table_priv_rev TO USER user2 +POSTHOOK: type: GRANT_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER DELETE false -1 user1 +default table_priv_rev user2 USER INSERT false -1 user1 +default table_priv_rev user2 USER UPDATE false -1 user1 +default table_priv_rev user2 USER SELECT true -1 user1 +PREHOOK: query: -- start revoking -- +-- revoke update privilege from user2 +REVOKE UPDATE ON TABLE table_priv_rev FROM USER user2 +PREHOOK: type: REVOKE_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- start revoking -- +-- revoke update privilege from user2 +REVOKE UPDATE ON TABLE table_priv_rev FROM USER user2 +POSTHOOK: type: REVOKE_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER DELETE false -1 user1 +default table_priv_rev user2 USER INSERT false -1 user1 +default table_priv_rev user2 USER SELECT true -1 user1 +PREHOOK: query: -- revoke DELETE privilege from user2 +REVOKE DELETE ON TABLE table_priv_rev FROM USER user2 +PREHOOK: type: REVOKE_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- revoke DELETE privilege from user2 +REVOKE DELETE ON TABLE table_priv_rev FROM USER user2 +POSTHOOK: type: REVOKE_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER INSERT false -1 user1 +default table_priv_rev user2 USER SELECT true -1 user1 +PREHOOK: query: -- revoke insert privilege from user2 +REVOKE INSERT ON TABLE table_priv_rev FROM USER user2 +PREHOOK: type: REVOKE_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- revoke insert privilege from user2 +REVOKE INSERT ON TABLE table_priv_rev FROM USER user2 +POSTHOOK: type: REVOKE_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT +default table_priv_rev user2 USER SELECT true -1 user1 +PREHOOK: query: -- revoke select privilege from user2 +REVOKE SELECT ON TABLE table_priv_rev FROM USER user2 +PREHOOK: type: REVOKE_PRIVILEGE +PREHOOK: Output: default@table_priv_rev +POSTHOOK: query: -- revoke select privilege from user2 +REVOKE SELECT ON TABLE table_priv_rev FROM USER user2 +POSTHOOK: type: REVOKE_PRIVILEGE +POSTHOOK: Output: default@table_priv_rev +PREHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +PREHOOK: type: SHOW_GRANT +POSTHOOK: query: SHOW GRANT USER user2 ON TABLE table_priv_rev +POSTHOOK: type: SHOW_GRANT diff --git a/ql/src/test/results/clientpositive/authorization_set_show_current_role.q.out b/ql/src/test/results/clientpositive/authorization_set_show_current_role.q.out new file mode 100644 index 0000000..11f782a --- /dev/null +++ b/ql/src/test/results/clientpositive/authorization_set_show_current_role.q.out @@ -0,0 +1,49 @@ +PREHOOK: query: show current roles +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: show current roles +POSTHOOK: type: SHOW_ROLES +PUBLIC + +PREHOOK: query: create role r1 +PREHOOK: type: CREATEROLE +POSTHOOK: query: create role r1 +POSTHOOK: type: CREATEROLE +PREHOOK: query: grant role r1 to user hive_test_user +PREHOOK: type: GRANT_ROLE +POSTHOOK: query: grant role r1 to user hive_test_user +POSTHOOK: type: GRANT_ROLE +PREHOOK: query: set role r1 +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role r1 +POSTHOOK: type: SHOW_ROLES +PREHOOK: query: show current roles +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: show current roles +POSTHOOK: type: SHOW_ROLES +r1 + +PREHOOK: query: set role PUBLIC +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role PUBLIC +POSTHOOK: type: SHOW_ROLES +PREHOOK: query: show current roles +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: show current roles +POSTHOOK: type: SHOW_ROLES +PUBLIC + +PREHOOK: query: set role NONE +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role NONE +POSTHOOK: type: SHOW_ROLES +PREHOOK: query: show current roles +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: show current roles +POSTHOOK: type: SHOW_ROLES +r1 +PUBLIC + +PREHOOK: query: drop role r1 +PREHOOK: type: DROPROLE +POSTHOOK: query: drop role r1 +POSTHOOK: type: DROPROLE diff --git a/ql/src/test/results/clientpositive/parquet_create.q.out b/ql/src/test/results/clientpositive/parquet_create.q.out new file mode 100644 index 0000000..34fdea2 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_create.q.out @@ -0,0 +1,206 @@ +PREHOOK: query: DROP TABLE parquet_create_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_create_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_create +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_create +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_create_staging ( + id int, + str string, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE parquet_create_staging ( + id int, + str string, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@parquet_create_staging +PREHOOK: query: CREATE TABLE parquet_create ( + id int, + str string, + mp MAP, + lst ARRAY, + strct STRUCT +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE parquet_create ( + id int, + str string, + mp MAP, + lst ARRAY, + strct STRUCT +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@parquet_create +PREHOOK: query: DESCRIBE FORMATTED parquet_create +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED parquet_create +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +id int from deserializer +str string from deserializer +mp map from deserializer +lst array from deserializer +strct struct from deserializer + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_create.txt' OVERWRITE INTO TABLE parquet_create_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@parquet_create_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_create.txt' OVERWRITE INTO TABLE parquet_create_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@parquet_create_staging +PREHOOK: query: SELECT * FROM parquet_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create_staging +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create_staging +#### A masked pattern was here #### +1 foo line1 {"key11":"value11","key12":"value12","key13":"value13"} ["a","b","c"] {"a":"one","b":"two"} +2 bar line2 {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"three","b":"four"} +3 baz line3 {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] {"a":"five","b":"six"} +PREHOOK: query: INSERT OVERWRITE TABLE parquet_create SELECT * FROM parquet_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create_staging +PREHOOK: Output: default@parquet_create +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_create SELECT * FROM parquet_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create_staging +POSTHOOK: Output: default@parquet_create +POSTHOOK: Lineage: parquet_create.id SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_create.lst SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_create.mp SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_create.str SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_create.strct SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +PREHOOK: query: SELECT * FROM parquet_create group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_create group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_create.id SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_create.lst SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_create.mp SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_create.str SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_create.strct SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +1 +2 +3 +PREHOOK: query: SELECT id, count(0) FROM parquet_create group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: query: SELECT id, count(0) FROM parquet_create group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_create.id SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_create.lst SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_create.mp SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_create.str SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_create.strct SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +1 1 +2 1 +3 1 +PREHOOK: query: SELECT str from parquet_create +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: query: SELECT str from parquet_create +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_create.id SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_create.lst SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_create.mp SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_create.str SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_create.strct SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +foo line1 +bar line2 +baz line3 +PREHOOK: query: SELECT mp from parquet_create +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: query: SELECT mp from parquet_create +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_create.id SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_create.lst SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_create.mp SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_create.str SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_create.strct SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +{"key12":"value12","key11":"value11","key13":"value13"} +{"key21":"value21","key23":"value23","key22":"value22"} +{"key33":"value33","key31":"value31","key32":"value32"} +PREHOOK: query: SELECT lst from parquet_create +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: query: SELECT lst from parquet_create +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_create.id SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_create.lst SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_create.mp SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_create.str SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_create.strct SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +["a","b","c"] +["d","e","f"] +["g","h","i"] +PREHOOK: query: SELECT strct from parquet_create +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: query: SELECT strct from parquet_create +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_create +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_create.id SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_create.lst SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_create.mp SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_create.str SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_create.strct SIMPLE [(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +{"a":"one","b":"two"} +{"a":"three","b":"four"} +{"a":"five","b":"six"} diff --git a/ql/src/test/results/clientpositive/parquet_partitioned.q.out b/ql/src/test/results/clientpositive/parquet_partitioned.q.out new file mode 100644 index 0000000..ecba6ce --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_partitioned.q.out @@ -0,0 +1,174 @@ +PREHOOK: query: DROP TABLE parquet_partitioned_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_partitioned_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_partitioned +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_partitioned +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_partitioned_staging ( + id int, + str string, + part string +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE parquet_partitioned_staging ( + id int, + str string, + part string +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@parquet_partitioned_staging +PREHOOK: query: CREATE TABLE parquet_partitioned ( + id int, + str string +) PARTITIONED BY (part string) +STORED AS PARQUET +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE parquet_partitioned ( + id int, + str string +) PARTITIONED BY (part string) +STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@parquet_partitioned +PREHOOK: query: DESCRIBE FORMATTED parquet_partitioned +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED parquet_partitioned +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +id int from deserializer +str string from deserializer + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_partitioned_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@parquet_partitioned_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_partitioned_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@parquet_partitioned_staging +PREHOOK: query: SELECT * FROM parquet_partitioned_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_partitioned_staging +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_partitioned_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_partitioned_staging +#### A masked pattern was here #### +1 foo part1 +2 bar part2 +3 baz part2 +PREHOOK: query: INSERT OVERWRITE TABLE parquet_partitioned PARTITION (part) SELECT * FROM parquet_partitioned_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_partitioned_staging +PREHOOK: Output: default@parquet_partitioned +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_partitioned PARTITION (part) SELECT * FROM parquet_partitioned_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_partitioned_staging +POSTHOOK: Output: default@parquet_partitioned@part=part1 +POSTHOOK: Output: default@parquet_partitioned@part=part2 +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM parquet_partitioned +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_partitioned +PREHOOK: Input: default@parquet_partitioned@part=part1 +PREHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_partitioned +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_partitioned +POSTHOOK: Input: default@parquet_partitioned@part=part1 +POSTHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +1 foo part1 +2 bar part2 +3 baz part2 +PREHOOK: query: SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_partitioned +PREHOOK: Input: default@parquet_partitioned@part=part1 +PREHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_partitioned +POSTHOOK: Input: default@parquet_partitioned@part=part1 +POSTHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +part1 1 +part2 2 +PREHOOK: query: SELECT * FROM parquet_partitioned +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_partitioned +PREHOOK: Input: default@parquet_partitioned@part=part1 +PREHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_partitioned +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_partitioned +POSTHOOK: Input: default@parquet_partitioned@part=part1 +POSTHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +1 foo part1 +2 bar part2 +3 baz part2 +PREHOOK: query: SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_partitioned +PREHOOK: Input: default@parquet_partitioned@part=part1 +PREHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_partitioned +POSTHOOK: Input: default@parquet_partitioned@part=part1 +POSTHOOK: Input: default@parquet_partitioned@part=part2 +#### A masked pattern was here #### +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part1).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).id SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_partitioned PARTITION(part=part2).str SIMPLE [(parquet_partitioned_staging)parquet_partitioned_staging.FieldSchema(name:str, type:string, comment:null), ] +part1 1 +part2 2 diff --git a/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java index d00db1c..e973f83 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java @@ -51,10 +51,9 @@ public static ExecuteStatementOperation newExecuteStatementOperation( HiveSession parentSession, String statement, Map confOverlay, boolean runAsync) throws HiveSQLException { String[] tokens = statement.trim().split("\\s+"); - String command = tokens[0].toLowerCase(); CommandProcessor processor = null; try { - processor = CommandProcessorFactory.getForHiveCommand(tokens[0], parentSession.getHiveConf()); + processor = CommandProcessorFactory.getForHiveCommand(tokens, parentSession.getHiveConf()); } catch (SQLException e) { throw new HiveSQLException(e.getMessage(), e.getSQLState(), e); }