Index: ql/src/test/results/clientpositive/input43.q.out =================================================================== --- ql/src/test/results/clientpositive/input43.q.out (revision 0) +++ ql/src/test/results/clientpositive/input43.q.out (revision 0) @@ -0,0 +1,10 @@ +PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table src1 +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table src1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@src1 +PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table src1 +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table src1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@src1 Index: ql/src/test/queries/clientpositive/input43.q =================================================================== --- ql/src/test/queries/clientpositive/input43.q (revision 0) +++ ql/src/test/queries/clientpositive/input43.q (revision 0) @@ -0,0 +1,2 @@ +load data local inpath '../data/files/kv1.txt' into table src1 ; +load data local inpath '../data/files/kv1.txt' into table src1 ; Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1006076) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -18,16 +18,6 @@ package org.apache.hadoop.hive.ql.metadata; -import static org.apache.hadoop.hive.metastore.MetaStoreUtils.DEFAULT_DATABASE_NAME; -import static org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE; -import static org.apache.hadoop.hive.serde.Constants.COLLECTION_DELIM; -import static org.apache.hadoop.hive.serde.Constants.ESCAPE_CHAR; -import static org.apache.hadoop.hive.serde.Constants.FIELD_DELIM; -import static org.apache.hadoop.hive.serde.Constants.LINE_DELIM; -import static org.apache.hadoop.hive.serde.Constants.MAPKEY_DELIM; -import static org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT; -import static org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME; - import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -52,7 +42,6 @@ import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; -import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; @@ -62,6 +51,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.index.HiveIndexHandler; +import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -82,7 +72,6 @@ private HiveConf conf = null; private IMetaStoreClient metaStoreClient; - private String currentDatabase; private static ThreadLocal hiveDB = new ThreadLocal() { @Override @@ -183,69 +172,6 @@ } /** - * Create a database - * @param db - * @param ifNotExist if true, will ignore AlreadyExistsException exception - * @throws AlreadyExistsException - * @throws HiveException - */ - public void createDatabase(Database db, boolean ifNotExist) - throws AlreadyExistsException, HiveException { - try { - getMSC().createDatabase(db); - } catch (AlreadyExistsException e) { - if (!ifNotExist) { - throw e; - } - } catch (Exception e) { - throw new HiveException(e); - } - } - - /** - * Create a Database. Raise an error if a database with the same name already exists. - * @param db - * @throws AlreadyExistsException - * @throws HiveException - */ - public void createDatabase(Database db) throws AlreadyExistsException, HiveException { - createDatabase(db, false); - } - - /** - * Drop a database. - * @param name - * @throws NoSuchObjectException - * @throws HiveException - * @see org.apache.hadoop.hive.metastore.HiveMetaStoreClient#dropDatabase(java.lang.String) - */ - public void dropDatabase(String name) throws HiveException, NoSuchObjectException { - dropDatabase(name, true, false); - } - - - /** - * Drop a database - * @param name - * @param deleteData - * @param ignoreUnknownDb if true, will ignore NoSuchObjectException - * @return - * @throws HiveException - * @throws NoSuchObjectException - */ - public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb) - throws HiveException, NoSuchObjectException { - try { - getMSC().dropDatabase(name, deleteData, ignoreUnknownDb); - } catch (NoSuchObjectException e) { - throw e; - } catch (Exception e) { - throw new HiveException(e); - } - } - - - /** * Creates a table metdata and the directory for the table data * * @param tableName @@ -297,12 +223,13 @@ throw new HiveException("columns not specified for table " + tableName); } - Table tbl = new Table(getCurrentDatabase(), tableName); + Table tbl = new Table(tableName); tbl.setInputFormatClass(fileInputFormat.getName()); tbl.setOutputFormatClass(fileOutputFormat.getName()); for (String col : columns) { - FieldSchema field = new FieldSchema(col, STRING_TYPE_NAME, "default"); + FieldSchema field = new FieldSchema(col, + org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME, "default"); tbl.getCols().add(field); } @@ -310,7 +237,9 @@ for (String partCol : partCols) { FieldSchema part = new FieldSchema(); part.setName(partCol); - part.setType(STRING_TYPE_NAME); // default partition key + part.setType(org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME); // default + // partition + // key tbl.getPartCols().add(part); } } @@ -334,7 +263,8 @@ public void alterTable(String tblName, Table newTbl) throws InvalidOperationException, HiveException { try { - getMSC().alter_table(getCurrentDatabase(), tblName, newTbl.getTTable()); + getMSC().alter_table(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, + newTbl.getTTable()); } catch (MetaException e) { throw new HiveException("Unable to alter table.", e); } catch (TException e) { @@ -356,7 +286,7 @@ public void alterPartition(String tblName, Partition newPart) throws InvalidOperationException, HiveException { try { - getMSC().alter_partition(getCurrentDatabase(), tblName, + getMSC().alter_partition(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, newPart.getTPartition()); } catch (MetaException e) { @@ -388,9 +318,6 @@ */ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { try { - if (tbl.getDbName() == null || "".equals(tbl.getDbName().trim())) { - tbl.setDbName(getCurrentDatabase()); - } if (tbl.getCols().size() == 0) { tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(), tbl.getDeserializer())); @@ -407,41 +334,22 @@ } /** - * - * @param tableName - * table name + * Creates the index with the given objects + * * @param indexName - * index name + * the index name + * @param tableName + * the table name that this index is built on * @param indexHandlerClass - * index handler class + * the index handler's class name * @param indexedCols - * index columns - * @param indexTblName - * index table's name - * @param deferredRebuild - * referred build index table's data * @param inputFormat - * input format * @param outputFormat - * output format * @param serde - * @param storageHandler - * index table's storage handler - * @param location - * location - * @param idxProps - * idx - * @param serdeProps - * serde properties - * @param collItemDelim - * @param fieldDelim - * @param fieldEscape - * @param lineDelim - * @param mapKeyDelim * @throws HiveException */ public void createIndex(String tableName, String indexName, String indexHandlerClass, - List indexedCols, String indexTblName, boolean deferredRebuild, + List indexedCols, String indexTblName, boolean autoRebuild, String inputFormat, String outputFormat, String serde, String storageHandler, String location, Map idxProps, Map serdeProps, @@ -450,23 +358,23 @@ throws HiveException { try { - String dbName = getCurrentDatabase(); + String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; Index old_index = null; try { - old_index = getIndex(dbName, tableName, indexName); + old_index = getIndex(dbName, tableName, indexName); } catch (Exception e) { } if (old_index != null) { throw new HiveException("Index " + indexName + " already exists on table " + tableName + ", db=" + dbName); } - + org.apache.hadoop.hive.metastore.api.Table baseTbl = getMSC().getTable(dbName, tableName); if (baseTbl.getTableType() == TableType.VIRTUAL_VIEW.toString()) { throw new HiveException("tableName="+ tableName +" is a VIRTUAL VIEW. Index on VIRTUAL VIEW is not supported."); } - + if (indexTblName == null) { - indexTblName = MetaStoreUtils.getIndexTableName(dbName, tableName, indexName); + indexTblName = MetaStoreUtils.getIndexTableName(dbName, tableName, indexName); } else { org.apache.hadoop.hive.metastore.api.Table temp = null; try { @@ -477,11 +385,11 @@ throw new HiveException("Table name " + indexTblName + " already exists. Choose another name."); } } - + org.apache.hadoop.hive.metastore.api.StorageDescriptor storageDescriptor = baseTbl.getSd().clone(); SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo(); if(serde != null) { - serdeInfo.setSerializationLib(serde); + serdeInfo.setSerializationLib(serde); } else { if (storageHandler == null) { serdeInfo.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); @@ -493,22 +401,24 @@ } if (fieldDelim != null) { - serdeInfo.getParameters().put(FIELD_DELIM, fieldDelim); - serdeInfo.getParameters().put(SERIALIZATION_FORMAT, fieldDelim); + serdeInfo.getParameters().put(Constants.FIELD_DELIM, fieldDelim); + serdeInfo.getParameters().put(Constants.SERIALIZATION_FORMAT, + fieldDelim); } if (fieldEscape != null) { - serdeInfo.getParameters().put(ESCAPE_CHAR, fieldEscape); + serdeInfo.getParameters().put(Constants.ESCAPE_CHAR, fieldEscape); } if (collItemDelim != null) { - serdeInfo.getParameters().put(COLLECTION_DELIM, collItemDelim); + serdeInfo.getParameters() + .put(Constants.COLLECTION_DELIM, collItemDelim); } if (mapKeyDelim != null) { - serdeInfo.getParameters().put(MAPKEY_DELIM, mapKeyDelim); + serdeInfo.getParameters().put(Constants.MAPKEY_DELIM, mapKeyDelim); } if (lineDelim != null) { - serdeInfo.getParameters().put(LINE_DELIM, lineDelim); + serdeInfo.getParameters().put(Constants.LINE_DELIM, lineDelim); } - + if (serdeProps != null) { Iterator> iter = serdeProps.entrySet() .iterator(); @@ -517,16 +427,16 @@ serdeInfo.getParameters().put(m.getKey(), m.getValue()); } } - + storageDescriptor.setLocation(null); if (location != null) { - storageDescriptor.setLocation(location); + storageDescriptor.setLocation(location); } storageDescriptor.setInputFormat(inputFormat); storageDescriptor.setOutputFormat(outputFormat); - + Map params = new HashMap(); - + List indexTblCols = new ArrayList(); List sortCols = new ArrayList(); storageDescriptor.setBucketCols(null); @@ -539,40 +449,35 @@ k++; } } - if (k != indexedCols.size()) { + if (k != indexedCols.size()) throw new RuntimeException( "Check the index columns, they should appear in the table being indexed."); - } - + storageDescriptor.setCols(indexTblCols); storageDescriptor.setSortCols(sortCols); - int time = (int) (System.currentTimeMillis() / 1000); + int time = (int) (System.currentTimeMillis() / 1000); org.apache.hadoop.hive.metastore.api.Table tt = null; HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass); if (indexHandler.usesIndexTable()) { - tt = new org.apache.hadoop.hive.ql.metadata.Table(dbName, indexTblName).getTTable(); + tt = new org.apache.hadoop.hive.ql.metadata.Table(indexTblName).getTTable(); List partKeys = baseTbl.getPartitionKeys(); tt.setPartitionKeys(partKeys); tt.setTableType(TableType.INDEX_TABLE.toString()); } - if(!deferredRebuild) { - throw new RuntimeException("Please specify deferred rebuild using \" WITH DEFERRED REBUILD \"."); - } - Index indexDesc = new Index(indexName, indexHandlerClass, dbName, tableName, time, time, indexTblName, - storageDescriptor, params, deferredRebuild); + storageDescriptor, params, autoRebuild, tt); indexHandler.analyzeIndexDefinition(baseTbl, indexDesc, tt); - - this.getMSC().createIndex(indexDesc, tt); - + + this.getMSC().createIndex(indexDesc); + } catch (Exception e) { throw new HiveException(e); } } - + public Index getIndex(String dbName, String baseTableName, String indexName) throws HiveException { try { @@ -581,7 +486,7 @@ throw new HiveException(e); } } - + public boolean dropIndex(String db_name, String tbl_name, String index_name, boolean deleteData) throws HiveException { try { return getMSC().dropIndex(db_name, tbl_name, index_name, deleteData); @@ -591,7 +496,7 @@ throw new HiveException("Unknow error. Please check logs.", e); } } - + /** * Drops table along with the data in it. If the table doesn't exist * then it is a no-op @@ -608,26 +513,6 @@ * @throws HiveException * thrown if the drop fails */ - public void dropTable(String tableName) throws HiveException { - dropTable(getCurrentDatabase(), tableName, true, true); - } - - /** - * Drops table along with the data in it. If the table doesn't exist - * then it is a no-op - * @param dbName database where the table lives - * @param tableName table to drop - * @throws HiveException thrown if the drop fails - * Drops table along with the data in it. If the table doesn't exist then it - * is a no-op - * - * @param dbName - * database where the table lives - * @param tableName - * table to drop - * @throws HiveException - * thrown if the drop fails - */ public void dropTable(String dbName, String tableName) throws HiveException { dropTable(dbName, tableName, true, true); } @@ -661,18 +546,7 @@ } /** - * Returns metadata for the table named tableName in the current database. - * @param tableName the name of the table - * @return - * @throws HiveException if there's an internal error or if the - * table doesn't exist - */ - public Table getTable(final String tableName) throws HiveException { - return this.getTable(getCurrentDatabase(), tableName, true); - } - - /** - * Returns metadata of the table + * Returns metadata of the table. * * @param dbName * the name of the database @@ -682,7 +556,9 @@ * @exception HiveException * if there's an internal error or if the table doesn't exist */ - public Table getTable(final String dbName, final String tableName) throws HiveException { + public Table getTable(final String dbName, final String tableName) + throws HiveException { + return this.getTable(dbName, tableName, true); } @@ -723,11 +599,12 @@ if (!TableType.VIRTUAL_VIEW.toString().equals(tTable.getTableType())) { // Fix the non-printable chars Map parameters = tTable.getSd().getParameters(); - String sf = parameters.get(SERIALIZATION_FORMAT); + String sf = parameters.get(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT); if (sf != null) { char[] b = sf.toCharArray(); if ((b.length == 1) && (b[0] < 10)) { // ^A, ^B, ^C, ^D, \t - parameters.put(SERIALIZATION_FORMAT, Integer.toString(b[0])); + parameters.put(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, + Integer.toString(b[0])); } } @@ -753,27 +630,12 @@ return table; } - /** - * Get all table names for the current database. - * @return List of table names - * @throws HiveException - */ public List getAllTables() throws HiveException { - return getAllTables(getCurrentDatabase()); + return getTablesByPattern(".*"); } /** - * Get all table names for the specified database. - * @param dbName - * @return List of table names - * @throws HiveException - */ - public List getAllTables(String dbName) throws HiveException { - return getTablesByPattern(dbName, ".*"); - } - - /** - * Returns all existing tables from default database which match the given + * returns all existing tables from default database which match the given * pattern. The matching occurs as per Java regular expressions * * @param tablePattern @@ -781,28 +643,13 @@ * @return list of table names * @throws HiveException */ - public List getTablesByPattern(String tablePattern) throws HiveException { - return getTablesByPattern(getCurrentDatabase(), tablePattern); + public List getTablesByPattern(String tablePattern) + throws HiveException { + return getTablesForDb(MetaStoreUtils.DEFAULT_DATABASE_NAME, tablePattern); } /** - * Returns all existing tables from the specified database which match the given - * pattern. The matching occurs as per Java regular expressions. - * @param dbName - * @param tablePattern - * @return list of table names - * @throws HiveException - */ - public List getTablesByPattern(String dbName, String tablePattern) throws HiveException { - try { - return getMSC().getTables(dbName, tablePattern); - } catch (Exception e) { - throw new HiveException(e); - } - } - - /** - * Returns all existing tables from the given database which match the given + * returns all existing tables from the given database which match the given * pattern. The matching occurs as per Java regular expressions * * @param database @@ -822,59 +669,32 @@ } /** - * Get all existing database names. - * - * @return List of database names. - * @throws HiveException + * @param name + * @param locationUri + * @return true or false + * @throws AlreadyExistsException + * @throws MetaException + * @throws TException + * @see org.apache.hadoop.hive.metastore.HiveMetaStoreClient#createDatabase(java.lang.String, + * java.lang.String) */ - public List getAllDatabases() throws HiveException { - try { - return getMSC().getAllDatabases(); - } catch (Exception e) { - throw new HiveException(e); - } + protected boolean createDatabase(String name, String locationUri) + throws AlreadyExistsException, MetaException, TException { + return getMSC().createDatabase(name, locationUri); } /** - * Get all existing databases that match the given - * pattern. The matching occurs as per Java regular expressions - * - * @param databasePattern - * java re pattern - * @return list of database names - * @throws HiveException + * @param name + * @return true or false + * @throws MetaException + * @throws TException + * @see org.apache.hadoop.hive.metastore.HiveMetaStoreClient#dropDatabase(java.lang.String) */ - public List getDatabasesByPattern(String databasePattern) throws HiveException { - try { - return getMSC().getDatabases(databasePattern); - } catch (Exception e) { - throw new HiveException(e); - } + protected boolean dropDatabase(String name) throws MetaException, TException { + return getMSC().dropDatabase(name); } /** - * Query metadata to see if a database with the given name already exists. - * - * @param dbName - * @return true if a database with the given name already exists, false if - * does not exist. - * @throws HiveException - */ - public boolean databaseExists(String dbName) throws HiveException { - try { - if (null != getMSC().getDatabase(dbName)) { - return true; - } - return false; - } catch (NoSuchObjectException e) { - return false; - } catch (Exception e) { - throw new HiveException(e); - } - } - - - /** * Load a directory into a Hive Table Partition - Alters existing content of * the partition with the contents of loadPath. - If he partition does not * exist - one is created - files in loadPath are moved into Hive. But the @@ -896,7 +716,7 @@ Map partSpec, boolean replace, Path tmpDirPath, boolean holdDDLTime) throws HiveException { - Table tbl = getTable(tableName); + Table tbl = getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); try { /** * Move files before creating the partition since down stream processes @@ -969,9 +789,6 @@ FileSystem fs = loadPath.getFileSystem(conf); FileStatus[] status = Utilities.getFileStatusRecurse(loadPath, numDP, fs); - if (status.length == 0) { - LOG.warn("No partition is genereated by dynamic partitioning"); - } if (status.length > conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS)) { throw new HiveException("Number of dynamic partitions created is " + status.length @@ -1021,7 +838,7 @@ */ public void loadTable(Path loadPath, String tableName, boolean replace, Path tmpDirPath, boolean holdDDLTime) throws HiveException { - Table tbl = getTable(tableName); + Table tbl = getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); if (replace) { tbl.replaceFiles(loadPath, tmpDirPath); @@ -1278,47 +1095,58 @@ return qlPartitions; } - /** - * Get the name of the current database - * @return - */ - public String getCurrentDatabase() { - if (null == currentDatabase) { - currentDatabase = DEFAULT_DATABASE_NAME; - } - return currentDatabase; - } - - /** - * Set the name of the current database - * @param currentDatabase - */ - public void setCurrentDatabase(String currentDatabase) { - this.currentDatabase = currentDatabase; - } - static private void checkPaths(FileSystem fs, FileStatus[] srcs, Path destf, boolean replace) throws HiveException { try { for (FileStatus src : srcs) { FileStatus[] items = fs.listStatus(src.getPath()); for (FileStatus item : items) { + Path itemStaging = item.getPath(); if (Utilities.isTempPath(item)) { // This check is redundant because temp files are removed by // execution layer before // calling loadTable/Partition. But leaving it in just in case. - fs.delete(item.getPath(), true); + fs.delete(itemStaging, true); continue; } if (item.isDir()) { throw new HiveException("checkPaths: " + src.getPath() - + " has nested directory" + item.getPath()); + + " has nested directory" + itemStaging); } - Path tmpDest = new Path(destf, item.getPath().getName()); - if (!replace && fs.exists(tmpDest)) { - throw new HiveException("checkPaths: " + tmpDest - + " already exists"); + if (!replace) { + // It's possible that the file we're copying may have the same + // relative name as an existing file in the "destf" directory. + // So let's make a quick check to see if we can rename any + // potential offenders so as to allow them to move into the + // "destf" directory. The scheme is dead simple: simply tack + // on "_copy_N" where N starts at 1 and works its way up until + // we find a free space. + + // Note: there are race conditions here, but I don't believe + // they're worse than what was already present. + int counter = 1; + Path itemDest = new Path(destf, itemStaging.getName()); + + while (fs.exists(itemDest)) { + Path proposedStaging = itemStaging.suffix("_copy_" + counter++); + Path proposedDest = new Path(destf, proposedStaging.getName()); + + if (fs.exists(proposedDest)) { + // There's already a file in our destination directory with our + // _copy_N suffix. We've been here before... + LOG.trace(proposedDest + " already exists"); + continue; + } + + if (!fs.rename(itemStaging, proposedStaging)) { + LOG.debug("Unsuccessfully in attempt to rename " + itemStaging + " to " + proposedStaging + "..."); + continue; + } + + LOG.debug("Successfully renamed " + itemStaging + " to " + proposedStaging); + itemDest = proposedDest; + } } } } @@ -1389,7 +1217,7 @@ Path tmppath) throws HiveException { FileStatus[] srcs; try { - srcs = fs.listStatus(srcf); + srcs = fs.globStatus(srcf); } catch (IOException e) { throw new HiveException("addFiles: filesystem error in check phase", e); } @@ -1459,8 +1287,10 @@ return null; } HiveStorageHandler storageHandler = - HiveUtils.getStorageHandler(conf, - tbl.getParameters().get(META_TABLE_STORAGE)); + HiveUtils.getStorageHandler( + conf, + tbl.getParameters().get( + org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE)); if (storageHandler == null) { return null; }