Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 6444) +++ conf/hive-default.xml (working copy) @@ -474,6 +474,12 @@ + hive.exec.counters.pull.interval + 1000 + The interval with which to poll the JobTracker for the counters the running job. The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be. + + + hive.enforce.bucketing false Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced. Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 6444) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -84,6 +84,7 @@ EXECPARALLEL("hive.exec.parallel", false), // parallel query launching EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8), HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true), + HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L), DYNAMICPARTITIONING("hive.exec.dynamic.partition", false), DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict"), DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000), Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (revision 6444) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (working copy) @@ -73,6 +73,7 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobStatus; import org.apache.hadoop.mapred.Partitioner; import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.TaskCompletionEvent; @@ -315,11 +316,23 @@ long maxReportInterval = 60 * 1000; // One minute boolean fatal = false; StringBuilder errMsg = new StringBuilder(); + long pullInterval = HiveConf.getLongVar(job, + HiveConf.ConfVars.HIVECOUNTERSPULLINTERVAL); + boolean initializing = true; while (!rj.isComplete()) { try { - Thread.sleep(1000); + Thread.sleep(pullInterval); } catch (InterruptedException e) { } + if (initializing && + rj.getJobState() == JobStatus.PREP) { + // No reason to poll untill the job is initialized + continue; + } else { + // By now the job is initialized so no reason to do + // rj.getJobState() again and we do not want to do an extra RPC call + initializing = false; + } th.setRunningJob(jc.getJob(rj.getJobID())); // If fatal errors happen we should kill the job immediately rather than