From 95be0d6f2e7c5c9b6d705acf15f397daf732ba63 Mon Sep 17 00:00:00 2001 From: Syed Albiz Date: Sat, 11 Jun 2011 20:59:40 -0700 Subject: [PATCH 1/1] HIVE-2100 fix by checking for operator initialization instead of excluding from OpCtxMap diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 96a1631..86f4e13 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -362,18 +362,16 @@ public class MapOperator extends Operator implements Serializable { for (String onealias : aliases) { Operator op = conf.getAliasToWork().get( - onealias); + onealias); LOG.info("Adding alias " + onealias + " to work list for file " - + onefile); + + onefile); MapInputPath inp = new MapInputPath(onefile, onealias, op); opCtxMap.put(inp, opCtx); if (operatorToPaths.get(op) == null) { operatorToPaths.put(op, new java.util.ArrayList()); } operatorToPaths.get(op).add(onefile); - - op - .setParentOperators(new ArrayList>()); + op.setParentOperators(new ArrayList>()); op.getParentOperators().add(this); // check for the operators who will process rows coming to this Map // Operator @@ -389,6 +387,7 @@ public class MapOperator extends Operator implements Serializable { } } } + if (children.size() == 0) { // didn't find match for input file path in configuration! // serious problem .. @@ -409,6 +408,8 @@ public class MapOperator extends Operator implements Serializable { // set that parent initialization is done and call initialize on children state = State.INIT; List> children = getChildOperators(); + Path fpath = new Path((new Path(HiveConf.getVar(hconf, + HiveConf.ConfVars.HADOOPMAPFILENAME))).toUri().getPath()); for (Entry entry : opCtxMap.entrySet()) { // Add alias, table name, and partitions to hadoop conf so that their @@ -441,6 +442,16 @@ public class MapOperator extends Operator implements Serializable { } } if (shouldInit) { + shouldInit = false; + for (String path : paths) { + Path onepath = new Path(new Path(path).toUri().getPath()); + if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { + shouldInit = true; + break; + } + } + } + if (shouldInit) { op.initialize(hconf, new ObjectInspector[] {entry.getValue().getRowObjectInspector()}); } } diff --git ql/src/test/queries/clientpositive/nestedvirtual.q ql/src/test/queries/clientpositive/nestedvirtual.q new file mode 100644 index 0000000..7b80941 --- /dev/null +++ ql/src/test/queries/clientpositive/nestedvirtual.q @@ -0,0 +1,27 @@ +CREATE TABLE pokes (foo INT, bar STRING); +create table pokes2(foo INT, bar STRING); + +create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo); + +drop table jssarma_nilzma_bad; + +drop table pokes; +drop table pokes2; +CREATE TABLE pokes (foo INT, bar STRING); +create table pokes2(foo INT, bar STRING); + +create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo); + +drop table jssarma_nilzma_bad; + +drop table pokes; +drop table pokes2; +CREATE TABLE pokes (foo INT, bar STRING); +create table pokes2(foo INT, bar STRING); + +create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo); + +drop table jssarma_nilzma_bad; + +drop table pokes; +drop table pokes2; diff --git ql/src/test/results/clientpositive/nestedvirtual.q.out ql/src/test/results/clientpositive/nestedvirtual.q.out new file mode 100644 index 0000000..4e6a2ef --- /dev/null +++ ql/src/test/results/clientpositive/nestedvirtual.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes +PREHOOK: query: create table pokes2(foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table pokes2(foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes2 +PREHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@pokes +PREHOOK: Input: default@pokes2 +POSTHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@pokes +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table jssarma_nilzma_bad +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@jssarma_nilzma_bad +PREHOOK: Output: default@jssarma_nilzma_bad +POSTHOOK: query: drop table jssarma_nilzma_bad +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@jssarma_nilzma_bad +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table pokes +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes +PREHOOK: Output: default@pokes +POSTHOOK: query: drop table pokes +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes +POSTHOOK: Output: default@pokes +PREHOOK: query: drop table pokes2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes2 +PREHOOK: Output: default@pokes2 +POSTHOOK: query: drop table pokes2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@pokes2 +PREHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes +PREHOOK: query: create table pokes2(foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table pokes2(foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes2 +PREHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@pokes +PREHOOK: Input: default@pokes2 +POSTHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@pokes +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table jssarma_nilzma_bad +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@jssarma_nilzma_bad +PREHOOK: Output: default@jssarma_nilzma_bad +POSTHOOK: query: drop table jssarma_nilzma_bad +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@jssarma_nilzma_bad +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table pokes +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes +PREHOOK: Output: default@pokes +POSTHOOK: query: drop table pokes +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes +POSTHOOK: Output: default@pokes +PREHOOK: query: drop table pokes2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes2 +PREHOOK: Output: default@pokes2 +POSTHOOK: query: drop table pokes2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@pokes2 +PREHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE pokes (foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes +PREHOOK: query: create table pokes2(foo INT, bar STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table pokes2(foo INT, bar STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@pokes2 +PREHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@pokes +PREHOOK: Input: default@pokes2 +POSTHOOK: query: create table jssarma_nilzma_bad as select a.val, a.filename, a.offset from (select hash(foo) as val, INPUT__FILE__NAME as filename, BLOCK__OFFSET__INSIDE__FILE as offset from pokes) a join pokes2 b on (a.val = b.foo) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@pokes +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table jssarma_nilzma_bad +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@jssarma_nilzma_bad +PREHOOK: Output: default@jssarma_nilzma_bad +POSTHOOK: query: drop table jssarma_nilzma_bad +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@jssarma_nilzma_bad +POSTHOOK: Output: default@jssarma_nilzma_bad +PREHOOK: query: drop table pokes +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes +PREHOOK: Output: default@pokes +POSTHOOK: query: drop table pokes +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes +POSTHOOK: Output: default@pokes +PREHOOK: query: drop table pokes2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pokes2 +PREHOOK: Output: default@pokes2 +POSTHOOK: query: drop table pokes2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pokes2 +POSTHOOK: Output: default@pokes2 -- 1.7.4.4