diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index b0c3d3f..0139d51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -229,9 +229,11 @@ private void commit(FileSystem fs) throws HiveException { needToRename = false; } } - if (needToRename && outPaths[idx] != null && !fs.rename(outPaths[idx], finalPaths[idx])) { - throw new HiveException("Unable to rename output from: " + - outPaths[idx] + " to: " + finalPaths[idx]); + if (needToRename && outPaths[idx] != null) { + String name = finalPaths[idx].getName(); + for (int counter = 1; !fs.rename(outPaths[idx],finalPaths[idx]); counter++) { + finalPaths[idx] = new Path(finalPaths[idx].getParent(), name + "_copy_" + counter); + } } updateProgress(); } catch (IOException e) { @@ -889,20 +891,26 @@ protected String generateListBucketingDirName(Object row) { skewedValsCandidate.add(posPair.getSkewColPosition(), standObjs.get(posPair.getTblColPosition()).toString()); } - /* The row matches skewed column names. */ - if (allSkewedVals.contains(skewedValsCandidate)) { - /* matches skewed values. */ - lbDirName = FileUtils.makeListBucketingDirName(skewedCols, skewedValsCandidate); - locationMap.put(skewedValsCandidate, lbDirName); - } else { - /* create default directory. */ - lbDirName = FileUtils.makeDefaultListBucketingDirName(skewedCols, - lbCtx.getDefaultDirName()); - List defaultKey = Lists.newArrayList(lbCtx.getDefaultKey()); - if (!locationMap.containsKey(defaultKey)) { - locationMap.put(defaultKey, lbDirName); + + // Get the mapped folder first in case the location is set explicitly. + // Create the listBucketing folder only when it's not set. + lbDirName = locationMap.get(skewedValsCandidate); + if (lbDirName == null) { + if (allSkewedVals.contains(skewedValsCandidate)) { + /* matches skewed values. */ + lbDirName = FileUtils.makeListBucketingDirName(skewedCols, skewedValsCandidate); + locationMap.put(skewedValsCandidate, lbDirName); + } else { + /* create default directory. */ + lbDirName = FileUtils.makeDefaultListBucketingDirName(skewedCols, + lbCtx.getDefaultDirName()); + List defaultKey = Lists.newArrayList(lbCtx.getDefaultKey()); + if (!locationMap.containsKey(defaultKey)) { + locationMap.put(defaultKey, lbDirName); + } } } + return lbDirName; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 9d927bd..9f27c5d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1563,17 +1563,6 @@ public Partition loadPartition(Path loadPath, Table tbl, //column stats will be inaccurate StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); - // recreate the partition if it existed before - if (isSkewedStoreAsSubdir) { - org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition(); - SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo(); - /* Construct list bucketing location mappings from sub-directory name. */ - Map, String> skewedColValueLocationMaps = constructListBucketingLocationMap( - newPartPath, skewedInfo); - /* Add list bucketing location mappings. */ - skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps); - newCreatedTpart.getSd().setSkewedInfo(skewedInfo); - } if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE); } @@ -1610,89 +1599,6 @@ public Partition loadPartition(Path loadPath, Table tbl, } /** - * Walk through sub-directory tree to construct list bucketing location map. - * - * @param fSta - * @param fSys - * @param skewedColValueLocationMaps - * @param newPartPath - * @param skewedInfo - * @throws IOException - */ -private void walkDirTree(FileStatus fSta, FileSystem fSys, - Map, String> skewedColValueLocationMaps, Path newPartPath, SkewedInfo skewedInfo) - throws IOException { - /* Base Case. It's leaf. */ - if (!fSta.isDir()) { - /* construct one location map if not exists. */ - constructOneLBLocationMap(fSta, skewedColValueLocationMaps, newPartPath, skewedInfo); - return; - } - - /* dfs. */ - FileStatus[] children = fSys.listStatus(fSta.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - if (children != null) { - for (FileStatus child : children) { - walkDirTree(child, fSys, skewedColValueLocationMaps, newPartPath, skewedInfo); - } - } -} - -/** - * Construct a list bucketing location map - * @param fSta - * @param skewedColValueLocationMaps - * @param newPartPath - * @param skewedInfo - */ -private void constructOneLBLocationMap(FileStatus fSta, - Map, String> skewedColValueLocationMaps, - Path newPartPath, SkewedInfo skewedInfo) { - Path lbdPath = fSta.getPath().getParent(); - List skewedValue = new ArrayList(); - String lbDirName = FileUtils.unescapePathName(lbdPath.toString()); - String partDirName = FileUtils.unescapePathName(newPartPath.toString()); - String lbDirSuffix = lbDirName.replace(partDirName, ""); - String[] dirNames = lbDirSuffix.split(Path.SEPARATOR); - for (String dirName : dirNames) { - if ((dirName != null) && (dirName.length() > 0)) { - // Construct skewed-value to location map except default directory. - // why? query logic knows default-dir structure and don't need to get from map - if (!dirName - .equalsIgnoreCase(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME)) { - String[] kv = dirName.split("="); - if (kv.length == 2) { - skewedValue.add(kv[1]); - } - } - } - } - if ((skewedValue.size() > 0) && (skewedValue.size() == skewedInfo.getSkewedColNames().size()) - && !skewedColValueLocationMaps.containsKey(skewedValue)) { - skewedColValueLocationMaps.put(skewedValue, lbdPath.toString()); - } -} - - /** - * Construct location map from path - * - * @param newPartPath - * @param skewedInfo - * @return - * @throws IOException - * @throws FileNotFoundException - */ - private Map, String> constructListBucketingLocationMap(Path newPartPath, - SkewedInfo skewedInfo) throws IOException, FileNotFoundException { - Map, String> skewedColValueLocationMaps = new HashMap, String>(); - FileSystem fSys = newPartPath.getFileSystem(conf); - walkDirTree(fSys.getFileStatus(newPartPath), fSys, skewedColValueLocationMaps, newPartPath, - skewedInfo); - return skewedColValueLocationMaps; - } - - - /** * Given a source directory name of the load path, load all dynamically generated partitions * into the specified table and return a list of strings that represent the dynamic partition * paths. @@ -1833,20 +1739,6 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean //column stats will be inaccurate StatsSetupConst.clearColumnStatsState(tbl.getParameters()); - try { - if (isSkewedStoreAsSubdir) { - SkewedInfo skewedInfo = tbl.getSkewedInfo(); - // Construct list bucketing location mappings from sub-directory name. - Map, String> skewedColValueLocationMaps = constructListBucketingLocationMap( - tbl.getPath(), skewedInfo); - // Add list bucketing location mappings. - skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps); - } - } catch (IOException e) { - LOG.error(StringUtils.stringifyException(e)); - throw new HiveException(e); - } - EnvironmentContext environmentContext = null; if (hasFollowingStatsTask) { environmentContext = new EnvironmentContext(); diff --git a/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q b/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q index bf89e8f..ea6a6ca 100644 --- a/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q +++ b/ql/src/test/queries/clientpositive/create_alter_list_bucketing_table1.q @@ -29,7 +29,14 @@ alter table stored_as_dirs_single SKEWED BY (key) ON ('1','5','6') stored as DIRECTORIES; describe formatted stored_as_dirs_single; --- 7. create table like +-- 7. alter skew location +insert into stored_as_dirs_single values('1', 'value1'), ('3', 'value3'); +describe formatted stored_as_dirs_single; +alter table stored_as_dirs_single set skewed location('1'='file:${system:test.tmp.dir}/stored_as_dirs_single/key=new1'); +insert into stored_as_dirs_single values('1', 'value1'), ('3', 'value3'); +describe formatted stored_as_dirs_single; + +-- 8. create table like create table stored_as_dirs_single_like like stored_as_dirs_single; describe formatted stored_as_dirs_single_like; diff --git a/ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out b/ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out index df4a75b..e31d734 100644 --- a/ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out +++ b/ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out @@ -302,12 +302,120 @@ Skewed Columns: [key] Skewed Values: [[1], [5], [6]] Storage Desc Params: serialization.format 1 -PREHOOK: query: -- 7. create table like +PREHOOK: query: -- 7. alter skew location +insert into stored_as_dirs_single values('1', 'value1'), ('3', 'value3') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@stored_as_dirs_single +POSTHOOK: query: -- 7. alter skew location +insert into stored_as_dirs_single values('1', 'value1'), ('3', 'value3') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@stored_as_dirs_single +POSTHOOK: Lineage: stored_as_dirs_single.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: stored_as_dirs_single.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: describe formatted stored_as_dirs_single +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stored_as_dirs_single +POSTHOOK: query: describe formatted stored_as_dirs_single +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stored_as_dirs_single +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 2 + numRows 0 + rawDataSize 0 + totalSize 18 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[1], [5], [6]] +Storage Desc Params: + serialization.format 1 +#### A masked pattern was here #### +PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION +PREHOOK: Input: default@stored_as_dirs_single +PREHOOK: Output: default@stored_as_dirs_single +#### A masked pattern was here #### +POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION +POSTHOOK: Input: default@stored_as_dirs_single +POSTHOOK: Output: default@stored_as_dirs_single +#### A masked pattern was here #### +PREHOOK: query: insert into stored_as_dirs_single values('1', 'value1'), ('3', 'value3') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@stored_as_dirs_single +POSTHOOK: query: insert into stored_as_dirs_single values('1', 'value1'), ('3', 'value3') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@stored_as_dirs_single +POSTHOOK: Lineage: stored_as_dirs_single.key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: stored_as_dirs_single.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: describe formatted stored_as_dirs_single +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stored_as_dirs_single +POSTHOOK: query: describe formatted stored_as_dirs_single +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stored_as_dirs_single +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 3 + numRows 0 + rawDataSize 0 + totalSize 27 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[1], [5], [6]] +#### A masked pattern was here #### +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- 8. create table like create table stored_as_dirs_single_like like stored_as_dirs_single PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@stored_as_dirs_single_like -POSTHOOK: query: -- 7. create table like +POSTHOOK: query: -- 8. create table like create table stored_as_dirs_single_like like stored_as_dirs_single POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -348,6 +456,7 @@ Sort Columns: [] Stored As SubDirectories: Yes Skewed Columns: [key] Skewed Values: [[1], [5], [6]] +#### A masked pattern was here #### Storage Desc Params: serialization.format 1 PREHOOK: query: -- cleanup