Details
-
Bug
-
Status: Open
-
Major
-
Resolution: Unresolved
-
None
-
None
-
None
-
None
Description
I am not sure if partitions are needed in the source, or destination - it may still break with non-partitioned tables. Also I don't think it's specific to tablesample, I suspect it has something to do with the bucket file not being created.
Running the following on MiniTez
set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.exec.dynamic.partition.mode=nonstrict; set hive.fetch.task.conversion=none; set tez.grouping.min-size=1; set tez.grouping.max-size=2; set hive.tez.auto.reducer.parallelism=false; drop table intermediate; create table intermediate(key int) partitioned by (p int) stored as orc; insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2; insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2; create table bucket1_mm(key int, id int) partitioned by (key2 int) clustered by (key) sorted by (key) into 2 buckets; insert into table bucket1_mm partition (key2) select key + 1, key, key - 1 from intermediate union all select key - 1, key, key + 1 from intermediate; select * from bucket1_mm tablesample (bucket 2 out of 2) s;
Results in
2016-10-14T13:46:39,875 ERROR [96b8187f-c8a3-4075-94ae-cb866ce293c7 main] ql.Driver: FAILED: ArrayIndexOutOfBoundsException 1 java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:373) at org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:199) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setMapWork(GenMapRedUtils.java:599) at org.apache.hadoop.hive.ql.parse.GenTezUtils.setupMapWork(GenTezUtils.java:210) at org.apache.hadoop.hive.ql.parse.GenTezUtils.createMapWork(GenTezUtils.java:189) at org.apache.hadoop.hive.ql.parse.GenTezWork.process(GenTezWork.java:128) at org.apache.hadoop.hive.ql.lib.CompositeProcessor.process(CompositeProcessor.java:41) at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) at org.apache.hadoop.hive.ql.parse.GenTezWorkWalker.walk(GenTezWorkWalker.java:90) at org.apache.hadoop.hive.ql.parse.GenTezWorkWalker.walk(GenTezWorkWalker.java:109) at org.apache.hadoop.hive.ql.parse.GenTezWorkWalker.walk(GenTezWorkWalker.java:109) at org.apache.hadoop.hive.ql.parse.GenTezWorkWalker.walk(GenTezWorkWalker.java:109) at org.apache.hadoop.hive.ql.parse.GenTezWorkWalker.startWalking(GenTezWorkWalker.java:72) at org.apache.hadoop.hive.ql.parse.TezCompiler.generateTaskTree(TezCompiler.java:382) at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:270) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10928) at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:260) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:251) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:467) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:342) at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1226) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1346) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1143) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1131) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:400) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:336) at org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:1319) at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:1293) at org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:173) at org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:104) at org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver(TestMiniTezCliDriver.java:59) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:92) at org.junit.rules.RunRules.evaluate(RunRules.java:20) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229) at org.junit.runners.ParentRunner.run(ParentRunner.java:309) at org.junit.runners.Suite.runChild(Suite.java:127) at org.junit.runners.Suite.runChild(Suite.java:26) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229) at org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:73) at org.junit.rules.RunRules.evaluate(RunRules.java:20) at org.junit.runners.ParentRunner.run(ParentRunner.java:309) at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:367) at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:274) at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:161) at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:290) at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:242) at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:121)