Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-10151

insert into A select from B is broken when both A and B are Acid tables and bucketed the same way

    Details

      Description

      BucketingSortingReduceSinkOptimizer makes
      insert into AcidTable select * from otherAcidTable
      use BucketizedHiveInputFormat which bypasses ORC merge logic on read and tries to send bucket files (rather than table dir) down to OrcInputFormat.
      (this is true only if both AcidTable and otherAcidTable are bucketed the same way). Then ORC dies.

      More specifically:

      create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
      create table acidTblPart(a int, b int) partitioned by (p string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
      insert into acidTblPart partition(p=1) (a,b) values(1,2)
      insert into acidTbl(a,b) select a,b from acidTblPart where p = 1
      

      results in

      2015-04-29 13:57:35,807 ERROR [main]: exec.Task (SessionState.java:printError(956)) - Job Submission failed with exception 'java.lang.RuntimeException(serious problem)'
      java.lang.RuntimeException: serious problem
              at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1021)
              at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048)
              at org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat.getSplits(BucketizedHiveInputFormat.java:141)
              at org.apache.hadoop.mapreduce.JobSubmitter.writeOldSplits(JobSubmitter.java:624)
              at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:616)
              at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:492)
              at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1296)
              at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1293)
              at java.security.AccessController.doPrivileged(Native Method)
              at javax.security.auth.Subject.doAs(Subject.java:415)
              at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
              at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293)
              at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:562)
              at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:557)
              at java.security.AccessController.doPrivileged(Native Method)
              at javax.security.auth.Subject.doAs(Subject.java:415)
              at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
              at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:557)
              at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:548)
              at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:430)
              at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:137)
              at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160)
              at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88)
              at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1650)
              at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1409)
              at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1192)
              at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059)
              at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049)
              at org.apache.hadoop.hive.ql.TestTxnCommands2.runStatementOnDriver(TestTxnCommands2.java:225)
              at org.apache.hadoop.hive.ql.TestTxnCommands2.testDeleteIn2(TestTxnCommands2.java:148)
              at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
              at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
              at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
              at java.lang.reflect.Method.invoke(Method.java:606)
              at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
              at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
              at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
              at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
              at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
              at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
              at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
              at org.junit.rules.RunRules.evaluate(RunRules.java:20)
              at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
              at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
              at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
              at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
              at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
              at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
              at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)
              at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229)
              at org.junit.runners.ParentRunner.run(ParentRunner.java:309)
              at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:254)
              at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:149)
              at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:124)
              at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:200)
              at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:153)
              at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103)
      Caused by: java.util.concurrent.ExecutionException: java.lang.IllegalArgumentException: delta_0000001_0000001 does not start with base_
              at java.util.concurrent.FutureTask.report(FutureTask.java:122)
              at java.util.concurrent.FutureTask.get(FutureTask.java:188)
              at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:998)
              ... 56 more
      Caused by: java.lang.IllegalArgumentException: delta_0000001_0000001 does not start with base_
              at org.apache.hadoop.hive.ql.io.AcidUtils.parseBase(AcidUtils.java:144)
              at org.apache.hadoop.hive.ql.io.AcidUtils.parseBaseBucketFilename(AcidUtils.java:172)
              at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$FileGenerator.call(OrcInputFormat.java:655)
              at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$FileGenerator.call(OrcInputFormat.java:620)
              at java.util.concurrent.FutureTask.run(FutureTask.java:262)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
              at java.lang.Thread.run(Thread.java:745)
      
      2015-04-29 13:57:35,809 ERROR [main]: ql.Driver (SessionState.java:printError(956)) - FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
      

        Attachments

        1. HIVE-10151.patch
          5 kB
          Eugene Koifman

          Issue Links

            Activity

              People

              • Assignee:
                ekoifman Eugene Koifman
                Reporter:
                ekoifman Eugene Koifman
              • Votes:
                0 Vote for this issue
                Watchers:
                8 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: