From dca0b9042439f77331082f4b839df3ad93a9c0c3 Mon Sep 17 00:00:00 2001 From: Syed Albiz Date: Sat, 11 Jun 2011 20:59:40 -0700 Subject: [PATCH 1/1] HIVE-2100 fix by checking for operator initialization instead of excluding from OpCtxMap diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 96a1631..f09426f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -362,18 +362,16 @@ public class MapOperator extends Operator implements Serializable { for (String onealias : aliases) { Operator op = conf.getAliasToWork().get( - onealias); + onealias); LOG.info("Adding alias " + onealias + " to work list for file " - + onefile); + + onefile); MapInputPath inp = new MapInputPath(onefile, onealias, op); opCtxMap.put(inp, opCtx); if (operatorToPaths.get(op) == null) { operatorToPaths.put(op, new java.util.ArrayList()); } operatorToPaths.get(op).add(onefile); - - op - .setParentOperators(new ArrayList>()); + op.setParentOperators(new ArrayList>()); op.getParentOperators().add(this); // check for the operators who will process rows coming to this Map // Operator @@ -382,13 +380,12 @@ public class MapOperator extends Operator implements Serializable { childrenPaths.add(onefile); LOG.info("dump " + op.getName() + " " + opCtxMap.get(inp).getRowObjectInspector().getTypeName()); - if (!done) { - setInspectorInput(inp); - done = true; - } } + setInspectorInput(inp); + done = true; } } + if (children.size() == 0) { // didn't find match for input file path in configuration! // serious problem .. @@ -409,6 +406,8 @@ public class MapOperator extends Operator implements Serializable { // set that parent initialization is done and call initialize on children state = State.INIT; List> children = getChildOperators(); + Path fpath = new Path((new Path(HiveConf.getVar(hconf, + HiveConf.ConfVars.HADOOPMAPFILENAME))).toUri().getPath()); for (Entry entry : opCtxMap.entrySet()) { // Add alias, table name, and partitions to hadoop conf so that their diff --git ql/src/test/queries/clientpositive/nestedvirtual.q ql/src/test/queries/clientpositive/nestedvirtual.q new file mode 100644 index 0000000..7b80941 --- /dev/null +++ ql/src/test/queries/clientpositive/nestedvirtual.q @@ -0,0 +1,27 @@ +CREATE TABLE pokes (foo INT, bar STRING); +create table pokes2(foo INT, bar STRING); + +create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo); + +drop table jssarma_nilzma_bad; + +drop table pokes; +drop table pokes2; +CREATE TABLE pokes (foo INT, bar STRING); +create table pokes2(foo INT, bar STRING); + +create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo); + +drop table jssarma_nilzma_bad; + +drop table pokes; +drop table pokes2; +CREATE TABLE pokes (foo INT, bar STRING); +create table pokes2(foo INT, bar STRING); + +create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo); + +drop table jssarma_nilzma_bad; + +drop table pokes; +drop table pokes2; diff --git ql/src/test/results/clientpositive/nestedvirtual.q.out ql/src/test/results/clientpositive/nestedvirtual.q.out new file mode 100644 index 0000000..4e6a2ef --- /dev/null +++ ql/src/test/results/clientpositive/nestedvirtual.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes +PREHOOK: query: create table pokes2(foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table pokes2(foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes2 +PREHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@pokes +PREHOOK: Input: default@pokes2 +POSTHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@pokes +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table jssarma_nilzma_bad +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@jssarma_nilzma_bad +PREHOOK: Output: default@jssarma_nilzma_bad +POSTHOOK: query: drop table jssarma_nilzma_bad +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@jssarma_nilzma_bad +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table pokes +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes +PREHOOK: Output: default@pokes +POSTHOOK: query: drop table pokes +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes +POSTHOOK: Output: default@pokes +PREHOOK: query: drop table pokes2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes2 +PREHOOK: Output: default@pokes2 +POSTHOOK: query: drop table pokes2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@pokes2 +PREHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes +PREHOOK: query: create table pokes2(foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table pokes2(foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes2 +PREHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@pokes +PREHOOK: Input: default@pokes2 +POSTHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@pokes +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table jssarma_nilzma_bad +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@jssarma_nilzma_bad +PREHOOK: Output: default@jssarma_nilzma_bad +POSTHOOK: query: drop table jssarma_nilzma_bad +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@jssarma_nilzma_bad +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table pokes +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes +PREHOOK: Output: default@pokes +POSTHOOK: query: drop table pokes +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes +POSTHOOK: Output: default@pokes +PREHOOK: query: drop table pokes2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes2 +PREHOOK: Output: default@pokes2 +POSTHOOK: query: drop table pokes2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@pokes2 +PREHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes +PREHOOK: query: create table pokes2(foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table pokes2(foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes2 +PREHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@pokes +PREHOOK: Input: default@pokes2 +POSTHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@pokes +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table jssarma_nilzma_bad +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@jssarma_nilzma_bad +PREHOOK: Output: default@jssarma_nilzma_bad +POSTHOOK: query: drop table jssarma_nilzma_bad +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@jssarma_nilzma_bad +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table pokes +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes +PREHOOK: Output: default@pokes +POSTHOOK: query: drop table pokes +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes +POSTHOOK: Output: default@pokes +PREHOOK: query: drop table pokes2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes2 +PREHOOK: Output: default@pokes2 +POSTHOOK: query: drop table pokes2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@pokes2 -- 1.7.4.4