Index: build-common.xml =================================================================== --- build-common.xml (revision 965024) +++ build-common.xml (working copy) @@ -407,6 +407,7 @@ --> + Index: ql/src/test/results/clientpositive/sample10.q.out =================================================================== --- ql/src/test/results/clientpositive/sample10.q.out (revision 965024) +++ ql/src/test/results/clientpositive/sample10.q.out (working copy) @@ -102,14 +102,14 @@ type: bigint Needs Tagging: false Path -> Alias: - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/attempt_local_0001_r_000000_0 [srcpartbucket] - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/attempt_local_0001_r_000000_0 [srcpartbucket] - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/attempt_local_0001_r_000000_0 [srcpartbucket] - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/attempt_local_0001_r_000000_0 [srcpartbucket] + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/000000_0 [srcpartbucket] + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/000000_0 [srcpartbucket] + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/000000_0 [srcpartbucket] + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/000000_0 [srcpartbucket] Path -> Partition: - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/attempt_local_0001_r_000000_0 + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/000000_0 Partition - base file name: attempt_local_0001_r_000000_0 + base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -122,13 +122,13 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -140,19 +140,19 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: srcpartbucket name: srcpartbucket - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/attempt_local_0001_r_000000_0 + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/000000_0 Partition - base file name: attempt_local_0001_r_000000_0 + base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -165,13 +165,13 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -183,19 +183,19 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: srcpartbucket name: srcpartbucket - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/attempt_local_0001_r_000000_0 + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/000000_0 Partition - base file name: attempt_local_0001_r_000000_0 + base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -208,13 +208,13 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -226,19 +226,19 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: srcpartbucket name: srcpartbucket - file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/attempt_local_0001_r_000000_0 + file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/000000_0 Partition - base file name: attempt_local_0001_r_000000_0 + base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -251,13 +251,13 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -269,13 +269,13 @@ columns.types string:string file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat - location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket + location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket name srcpartbucket partition_columns ds/hr serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - transient_lastDdlTime 1277145923 + transient_lastDdlTime 1279517872 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: srcpartbucket name: srcpartbucket @@ -299,7 +299,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/nzhang/work/999/apache-hive/build/ql/scratchdir/hive_2010-06-21_11-45-29_813_4438091765019255104/10001 + directory: file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/scratchdir/hive_2010-07-18_22-37-59_060_3045357226899710132/10001 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -322,14 +322,14 @@ PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-30_185_5515955290012905688/10000 +PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-37-59_639_2719302052990534208/10000 POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-30_185_5515955290012905688/10000 +POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-37-59_639_2719302052990534208/10000 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] @@ -346,14 +346,14 @@ PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-35_252_5650802559039809329/10000 +PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-03_708_4461107599597555917/10000 POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-35_252_5650802559039809329/10000 +POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-03_708_4461107599597555917/10000 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] @@ -370,14 +370,14 @@ PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-40_176_6924299231993417982/10000 +PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-07_920_2071428667805216792/10000 POSTHOOK: query: select * from srcpartbucket where ds is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-40_176_6924299231993417982/10000 +POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-07_920_2071428667805216792/10000 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (revision 965024) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (working copy) @@ -20,7 +20,6 @@ import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.util.ArrayList; import java.util.LinkedList; @@ -50,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ScriptDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.TextInputFormat; @@ -433,34 +433,14 @@ mr.setReducer(op5); } - private File generatePlanFile() throws Exception { - File scratchDir = new File((new HiveConf(TestExecDriver.class)) - .getVar(ConfVars.SCRATCHDIR)); - File planFile = File.createTempFile("plan", ".xml", scratchDir); - System.out.println("Generating plan file " + planFile.toString()); - FileOutputStream out = new FileOutputStream(planFile); - Utilities.serializeMapRedWork(mr, out); - return planFile; - } - - private void executePlan(File planFile) throws Exception { + private void executePlan() throws Exception { String testName = new Exception().getStackTrace()[1].getMethodName(); - String cmdLine = conf.getVar(HiveConf.ConfVars.HADOOPBIN) + " jar " - + conf.getJar() + " org.apache.hadoop.hive.ql.exec.ExecDriver -plan " - + planFile.toString() + " " + ExecDriver.generateCmdLine(conf); - System.out.println("Executing: " + cmdLine); - Process executor = Runtime.getRuntime().exec(cmdLine); + MapRedTask mrtask = new MapRedTask(); + DriverContext dctx = new DriverContext (); + mrtask.setWork(mr); + mrtask.initialize(conf, null, dctx); + int exitVal = mrtask.execute(dctx); - StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), - null, System.out); - StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), - null, System.err); - - outPrinter.start(); - errPrinter.start(); - - int exitVal = executor.waitFor(); - if (exitVal != 0) { System.out.println(testName + " execution failed with exit status: " + exitVal); @@ -475,8 +455,7 @@ try { populateMapPlan1(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("lt100.txt.deflate", "mapplan1.out"); } catch (Throwable e) { e.printStackTrace(); @@ -490,8 +469,7 @@ try { populateMapPlan2(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("lt100.txt", "mapplan2.out"); } catch (Throwable e) { e.printStackTrace(); @@ -506,8 +484,7 @@ try { populateMapRedPlan1(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("kv1.val.sorted.txt", "mapredplan1.out"); } catch (Throwable e) { e.printStackTrace(); @@ -522,8 +499,7 @@ try { populateMapRedPlan2(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("lt100.sorted.txt", "mapredplan2.out"); } catch (Throwable e) { e.printStackTrace(); @@ -538,8 +514,7 @@ try { populateMapRedPlan3(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src"), db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src2")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("kv1kv2.cogroup.txt", "mapredplan3.out"); } catch (Throwable e) { e.printStackTrace(); @@ -554,8 +529,7 @@ try { populateMapRedPlan4(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("kv1.string-sorted.txt", "mapredplan4.out"); } catch (Throwable e) { e.printStackTrace(); @@ -570,8 +544,7 @@ try { populateMapRedPlan5(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("kv1.string-sorted.txt", "mapredplan5.out"); } catch (Throwable e) { e.printStackTrace(); @@ -586,8 +559,7 @@ try { populateMapRedPlan6(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src")); - File planFile = generatePlanFile(); - executePlan(planFile); + executePlan(); fileDiff("lt100.sorted.txt", "mapredplan6.out"); } catch (Throwable e) { e.printStackTrace(); Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (revision 965024) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (working copy) @@ -26,6 +26,7 @@ import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.CollectDesc; @@ -137,48 +138,47 @@ } } + private void testTaskIds(String [] taskIds, String expectedAttemptId, String expectedTaskId) { + Configuration conf = new JobConf(TestOperators.class); + for (String one: taskIds) { + conf.set("mapred.task.id", one); + String attemptId = Utilities.getTaskId(conf); + assertEquals(expectedAttemptId, attemptId); + assertEquals(Utilities.getTaskIdFromFilename(attemptId), expectedTaskId); + assertEquals(Utilities.getTaskIdFromFilename(attemptId + ".gz"), expectedTaskId); + assertEquals(Utilities.getTaskIdFromFilename + (Utilities.toTempPath(new Path(attemptId + ".gz")).toString()), expectedTaskId); + } + } + + /** + * More stuff needs to be added here. Currently it only checks some basic + * file naming libraries + * The old test was deactivated as part of hive-405 + */ public void testFileSinkOperator() throws Throwable { + try { - System.out.println("Testing FileSink Operator"); - // col1 - ExprNodeDesc exprDesc1 = TestExecDriver.getStringColumn("col1"); + testTaskIds (new String [] { + "attempt_200707121733_0003_m_000005_0", + "attempt_local_0001_m_000005_0", + "task_200709221812_0001_m_000005_0", + "task_local_0001_m_000005_0" + }, "000005_0", "000005"); - // col2 - ExprNodeDesc expr1 = TestExecDriver.getStringColumn("col0"); - ExprNodeDesc expr2 = new ExprNodeConstantDesc("1"); - ExprNodeDesc exprDesc2 = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc("concat", expr1, expr2); + testTaskIds (new String [] { + "job_local_0001_map_000005", + "job_local_0001_reduce_000005", + }, "000005", "000005"); - // select operator to project these two columns - ArrayList earr = new ArrayList(); - earr.add(exprDesc1); - earr.add(exprDesc2); - ArrayList outputCols = new ArrayList(); - for (int i = 0; i < earr.size(); i++) { - outputCols.add("_col" + i); - } - SelectDesc selectCtx = new SelectDesc(earr, outputCols); - Operator op = OperatorFactory.get(SelectDesc.class); - op.setConf(selectCtx); + testTaskIds (new String [] {"1234567"}, + "1234567", "1234567"); - // fileSinkOperator to dump the output of the select - // fileSinkDesc fsd = new fileSinkDesc ("file:///tmp" + File.separator + - // System.getProperty("user.name") + File.separator + - // "TestFileSinkOperator", - // Utilities.defaultTd, false); - // Operator flop = OperatorFactory.getAndMakeChild(fsd, op); + assertEquals(Utilities.getTaskIdFromFilename + ("/mnt/dev005/task_local_0001_m_000005_0"), + "000005"); - op.initialize(new JobConf(TestOperators.class), - new ObjectInspector[] {r[0].oi}); - - // evaluate on row - for (int i = 0; i < 5; i++) { - op.process(r[i].o, 0); - } - op.close(false); - System.out.println("FileSink Operator ok"); - } catch (Throwable e) { e.printStackTrace(); throw e; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (revision 965024) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (working copy) @@ -278,8 +278,8 @@ if(!HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJT).equals("local")) { // use the default file system of the job - FileSystem fs = FileSystem.get(job); Path planPath = new Path(hiveScratchDir, "plan." + randGen.nextInt()); + FileSystem fs = planPath.getFileSystem(job); FSDataOutputStream out = fs.create(planPath); serializeMapRedWork(w, out); HiveConf.setVar(job, HiveConf.ConfVars.PLAN, planPath.toString()); @@ -467,9 +467,16 @@ public static String getTaskId(Configuration hconf) { String taskid = (hconf == null) ? null : hconf.get("mapred.task.id"); if ((taskid == null) || taskid.equals("")) { - return ("" + randGen.nextInt()); + return ("" + Math.abs(randGen.nextInt())); } else { - return taskid.replaceAll("task_[0-9]+_", ""); + /* extract the task and attempt id from the hadoop taskid. + in version 17 the leading component was 'task_'. thereafter + the leading component is 'attempt_'. in 17 - hadoop also + seems to have used _map_ and _reduce_ to denote map/reduce + task types + */ + String ret = taskid.replaceAll(".*_[mr]_", "").replaceAll(".*_(map|reduce)_", ""); + return (ret); } } @@ -958,31 +965,28 @@ /** * The first group will contain the task id. The second group is the optional - * extension. The file name looks like: "24931_r_000000_0" or - * "24931_r_000000_0.gz" + * extension. The file name looks like: "0_0" or "0_0.gz". There may be a leading + * prefix (tmp_). Since getTaskId() can return an integer only - this should match + * a pure integer as well */ - private static Pattern fileNameTaskIdRegex = Pattern.compile("^.*_([0-9]*)_[0-9](\\..*)?$"); + private static Pattern fileNameTaskIdRegex = Pattern.compile("^.*?([0-9]+)(_[0-9])?(\\..*)?$"); /** - * Local job name looks like "job_local_1_map_0000", where 1 is job ID and 0000 is task ID. + * Get the task id from the filename. + * It is assumed that the filename is derived from the output of getTaskId + * + * @param filename filename to extract taskid from */ - private static Pattern fileNameLocalTaskIdRegex = Pattern.compile(".*local.*_([0-9]*)$"); - - /** - * Get the task id from the filename. E.g., get "000000" out of - * "24931_r_000000_0" or "24931_r_000000_0.gz" - */ public static String getTaskIdFromFilename(String filename) { String taskId = filename; - Matcher m = fileNameTaskIdRegex.matcher(filename); + int dirEnd = filename.lastIndexOf(Path.SEPARATOR); + if (dirEnd != -1) + taskId = filename.substring(dirEnd + 1); + + Matcher m = fileNameTaskIdRegex.matcher(taskId); if (!m.matches()) { - Matcher m2 = fileNameLocalTaskIdRegex.matcher(filename); - if (!m2.matches()) { - LOG.warn("Unable to get task id from file name: " + filename - + ". Using full filename as task id."); - } else { - taskId = m2.group(1); - } + LOG.warn("Unable to get task id from file name: " + filename + + ". Using last component" + taskId + " as task id."); } else { taskId = m.group(1); } @@ -991,17 +995,21 @@ } /** - * Replace the task id from the filename. E.g., replace "000000" out of - * "24931_r_000000_0" or "24931_r_000000_0.gz" by 33 to - * "24931_r_000033_0" or "24931_r_000033_0.gz" + * Replace the task id from the filename. + * It is assumed that the filename is derived from the output of getTaskId + * + * @param filename filename to replace taskid + * "0_0" or "0_0.gz" by 33 to + * "33_0" or "33_0.gz" */ public static String replaceTaskIdFromFilename(String filename, int bucketNum) { String taskId = getTaskIdFromFilename(filename); String newTaskId = replaceTaskId(taskId, bucketNum); - return replaceTaskIdFromFilename(filename, taskId, newTaskId); + String ret = replaceTaskIdFromFilename(filename, taskId, newTaskId); + return (ret); } - public static String replaceTaskId(String taskId, int bucketNum) { + private static String replaceTaskId(String taskId, int bucketNum) { String strBucketNum = String.valueOf(bucketNum); int bucketNumLen = strBucketNum.length(); int taskIdLen = taskId.length(); @@ -1020,7 +1028,7 @@ * @param newTaskId * @return */ - public static String replaceTaskIdFromFilename(String filename, + private static String replaceTaskIdFromFilename(String filename, String oldTaskId, String newTaskId) { String[] spl = filename.split(oldTaskId);