diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 3fab298..4f7f59a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2948,7 +2948,6 @@ public static double getHighestSamplePercentage (MapWork work) { */ public static List getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, Context ctx, boolean skipDummy) throws Exception { - int sequenceNumber = 0; Set pathsProcessed = new HashSet(); List pathsToAdd = new LinkedList(); @@ -2975,7 +2974,7 @@ public static double getHighestSamplePercentage (MapWork work) { if (!skipDummy && isEmptyPath(job, path, ctx)) { path = createDummyFileForEmptyPartition(path, job, work, - hiveScratchDir, alias, sequenceNumber++); + hiveScratchDir, alias); } pathsToAdd.add(path); @@ -2991,8 +2990,7 @@ public static double getHighestSamplePercentage (MapWork work) { // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 // rows) if (path == null && !skipDummy) { - path = createDummyFileForEmptyTable(job, work, hiveScratchDir, - alias, sequenceNumber++); + path = createDummyFileForEmptyTable(job, work, hiveScratchDir, alias); pathsToAdd.add(path); } } @@ -3002,11 +3000,11 @@ public static double getHighestSamplePercentage (MapWork work) { @SuppressWarnings({"rawtypes", "unchecked"}) private static Path createEmptyFile(Path hiveScratchDir, HiveOutputFormat outFileFormat, JobConf job, - int sequenceNumber, Properties props, boolean dummyRow) + String alias, Properties props, boolean dummyRow) throws IOException, InstantiationException, IllegalAccessException { // create a dummy empty file in a new directory - String newDir = hiveScratchDir + Path.SEPARATOR + sequenceNumber; + String newDir = hiveScratchDir + Path.SEPARATOR + alias; Path newPath = new Path(newDir); FileSystem fs = newPath.getFileSystem(job); fs.mkdirs(newPath); @@ -3032,7 +3030,7 @@ private static Path createEmptyFile(Path hiveScratchDir, @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, - Path hiveScratchDir, String alias, int sequenceNumber) + Path hiveScratchDir, String alias) throws Exception { String strPath = path.toString(); @@ -3051,7 +3049,7 @@ private static Path createDummyFileForEmptyPartition(Path path, JobConf job, Map boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, - sequenceNumber, props, oneRow); + alias, props, oneRow); if (LOG.isInfoEnabled()) { LOG.info("Changed input file " + strPath + " to empty file " + newPath); @@ -3076,7 +3074,7 @@ private static Path createDummyFileForEmptyPartition(Path path, JobConf job, Map @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, - Path hiveScratchDir, String alias, int sequenceNumber) + Path hiveScratchDir, String alias) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); @@ -3089,7 +3087,7 @@ private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, - sequenceNumber, props, false); + alias, props, false); if (LOG.isInfoEnabled()) { LOG.info("Changed input file for alias " + alias + " to " + newPath); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java index cc59f13..943df49 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java @@ -25,19 +25,35 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Properties; +import java.util.UUID; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.JobConf; + +import org.mockito.Mockito; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -161,4 +177,63 @@ public void testMaskIfPassword() { Assert.assertEquals("###_MASKED_###",Utilities.maskIfPassword("password_a","test5")); Assert.assertEquals("###_MASKED_###",Utilities.maskIfPassword("a_PassWord_a","test6")); } + + /** + * Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)} + * can process two different empty tables without throwing any exceptions. + */ + public void testGetInputPathsWithEmptyTables() throws Exception { + String alias1Name = "alias1"; + String alias2Name = "alias2"; + + MapWork mapWork1 = new MapWork(); + MapWork mapWork2 = new MapWork(); + JobConf jobConf = new JobConf(); + + String nonExistentPath1 = UUID.randomUUID().toString(); + String nonExistentPath2 = UUID.randomUUID().toString(); + + PartitionDesc mockPartitionDesc = Mockito.mock(PartitionDesc.class); + TableDesc mockTableDesc = Mockito.mock(TableDesc.class); + + Mockito.when(mockTableDesc.isNonNative()).thenReturn(false); + Mockito.when(mockTableDesc.getProperties()).thenReturn(new Properties()); + + Mockito.when(mockPartitionDesc.getProperties()).thenReturn(new Properties()); + Mockito.when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc); + Mockito.doReturn(HiveSequenceFileOutputFormat.class).when( + mockPartitionDesc).getOutputFileFormatClass(); + + mapWork1.setPathToAliases(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name)))); + mapWork1.setAliasToWork(new LinkedHashMap>( + ImmutableMap.of(alias1Name, (Operator) Mockito.mock(Operator.class)))); + mapWork1.setPathToPartitionInfo(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath1, mockPartitionDesc))); + + mapWork2.setPathToAliases(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name)))); + mapWork2.setAliasToWork(new LinkedHashMap>( + ImmutableMap.of(alias2Name, (Operator) Mockito.mock(Operator.class)))); + mapWork2.setPathToPartitionInfo(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath2, mockPartitionDesc))); + + List inputPaths = new ArrayList<>(); + try { + Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR)); + inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, + Mockito.mock(Context.class), false)); + inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, + Mockito.mock(Context.class), false)); + assertEquals(inputPaths.size(), 2); + } finally { + File file; + for (Path path : inputPaths) { + file = new File(path.toString()); + if (file.exists()) { + file.delete(); + } + } + } + } }