Index: hbase-handler/src/test/results/hbase_joins.q.out =================================================================== --- hbase-handler/src/test/results/hbase_joins.q.out (revision 1057310) +++ hbase-handler/src/test/results/hbase_joins.q.out (working copy) @@ -10,6 +10,10 @@ PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE countries POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE users_level +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE users_level +POSTHOOK: type: DROPTABLE PREHOOK: query: -- From HIVE-1257 CREATE TABLE users(key string, state string, country string, country_id int) @@ -88,102 +92,102 @@ PREHOOK: type: QUERY PREHOOK: Input: default@countries PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-44_025_3464030805185795112/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-29_136_2919119166696342265/-mr-10000 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country = c.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@countries POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-44_025_3464030805185795112/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-29_136_2919119166696342265/-mr-10000 user1 USA United States USA PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country = c.country) PREHOOK: type: QUERY PREHOOK: Input: default@countries PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-49_449_2533239955498825412/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-38_418_1418937364423533875/-mr-10000 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country = c.country) POSTHOOK: type: QUERY POSTHOOK: Input: default@countries POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-49_449_2533239955498825412/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-38_418_1418937364423533875/-mr-10000 user1 USA United States USA PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country_id = c.country_id) PREHOOK: type: QUERY PREHOOK: Input: default@countries PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-54_460_9134325599532847572/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-47_279_1891102438076444084/-mr-10000 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country_id = c.country_id) POSTHOOK: type: QUERY POSTHOOK: Input: default@countries POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-54_460_9134325599532847572/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-47_279_1891102438076444084/-mr-10000 PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s ON (u.state = s.key) PREHOOK: type: QUERY PREHOOK: Input: default@states PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-59_427_3646437485215925564/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-54_306_2919915084551749896/-mr-10000 POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s ON (u.state = s.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@states POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-59_427_3646437485215925564/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-54_306_2919915084551749896/-mr-10000 user1 IA Iowa PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country = c.key) PREHOOK: type: QUERY PREHOOK: Input: default@countries PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-03_357_736778343063311968/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-03_810_1067780128697572780/-mr-10000 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country = c.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@countries POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-03_357_736778343063311968/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-03_810_1067780128697572780/-mr-10000 user1 USA United States USA PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country = c.country) PREHOOK: type: QUERY PREHOOK: Input: default@countries PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-08_313_7684989920596569472/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-14_736_8923692779050900406/-mr-10000 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country = c.country) POSTHOOK: type: QUERY POSTHOOK: Input: default@countries POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-08_313_7684989920596569472/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-14_736_8923692779050900406/-mr-10000 user1 USA United States USA PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country_id = c.country_id) PREHOOK: type: QUERY PREHOOK: Input: default@countries PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-13_256_4291980393265625395/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-22_235_696090037944243521/-mr-10000 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c ON (u.country_id = c.country_id) POSTHOOK: type: QUERY POSTHOOK: Input: default@countries POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-13_256_4291980393265625395/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-22_235_696090037944243521/-mr-10000 PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s ON (u.state = s.key) PREHOOK: type: QUERY PREHOOK: Input: default@states PREHOOK: Input: default@users -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-18_206_1231084557369200625/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-28_951_5386570432365997648/-mr-10000 POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s ON (u.state = s.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@states POSTHOOK: Input: default@users -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-18_206_1231084557369200625/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-28_951_5386570432365997648/-mr-10000 user1 IA Iowa PREHOOK: query: DROP TABLE users PREHOOK: type: DROPTABLE @@ -209,3 +213,55 @@ POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@countries POSTHOOK: Output: default@countries +PREHOOK: query: CREATE TABLE users(key int, userid int, username string, created int) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE users(key int, userid int, username string, created int) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@users +PREHOOK: query: CREATE TABLE users_level(key int, userid int, level int) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE users_level(key int, userid int, level int) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@users_level +PREHOOK: query: -- HIVE-1903: the problem fixed here showed up even without any data, +-- so no need to load any to test it +SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num + FROM users JOIN users_level ON (users.userid = users_level.userid) + GROUP BY year(from_unixtime(users.created)), level +PREHOOK: type: QUERY +PREHOOK: Input: default@users +PREHOOK: Input: default@users_level +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-47_957_2665969936395506719/-mr-10000 +POSTHOOK: query: -- HIVE-1903: the problem fixed here showed up even without any data, +-- so no need to load any to test it +SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num + FROM users JOIN users_level ON (users.userid = users_level.userid) + GROUP BY year(from_unixtime(users.created)), level +POSTHOOK: type: QUERY +POSTHOOK: Input: default@users +POSTHOOK: Input: default@users_level +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-47_957_2665969936395506719/-mr-10000 +PREHOOK: query: DROP TABLE users +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@users +PREHOOK: Output: default@users +POSTHOOK: query: DROP TABLE users +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@users +POSTHOOK: Output: default@users +PREHOOK: query: DROP TABLE users_level +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@users_level +PREHOOK: Output: default@users_level +POSTHOOK: query: DROP TABLE users_level +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@users_level +POSTHOOK: Output: default@users_level Index: hbase-handler/src/test/queries/hbase_joins.q =================================================================== --- hbase-handler/src/test/queries/hbase_joins.q (revision 1057310) +++ hbase-handler/src/test/queries/hbase_joins.q (working copy) @@ -1,6 +1,7 @@ DROP TABLE users; DROP TABLE states; DROP TABLE countries; +DROP TABLE users_level; -- From HIVE-1257 @@ -62,3 +63,20 @@ DROP TABLE users; DROP TABLE states; DROP TABLE countries; + +CREATE TABLE users(key int, userid int, username string, created int) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created"); + +CREATE TABLE users_level(key int, userid int, level int) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level"); + +-- HIVE-1903: the problem fixed here showed up even without any data, +-- so no need to load any to test it +SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num + FROM users JOIN users_level ON (users.userid = users_level.userid) + GROUP BY year(from_unixtime(users.created)), level; + +DROP TABLE users; +DROP TABLE users_level; Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (revision 1057310) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (working copy) @@ -217,17 +217,23 @@ // clone a jobConf for setting needed columns for reading JobConf cloneJobConf = new JobConf(job); - pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath() - .toString(), hsplit.getPath().toUri().getPath()); - InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, - cloneJobConf); + if (this.mrwork == null) { + init(job); + } + boolean nonNative = false; PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath().toString()); if ((part != null) && (part.getTableDesc() != null)) { Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); + nonNative = part.getTableDesc().isNonNative(); } + pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath() + .toString(), hsplit.getPath().toUri().getPath(), nonNative); + + InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, + cloneJobConf); RecordReader innerReader = inputFormat.getRecordReader(inputSplit, cloneJobConf, reporter); @@ -356,6 +362,12 @@ protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass, String splitPath, String splitPathWithNoSchema) { + pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, + splitPathWithNoSchema, false); + } + + protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass, + String splitPath, String splitPathWithNoSchema, boolean nonNative) { if (this.mrwork == null) { init(job); } @@ -367,7 +379,22 @@ while (iterator.hasNext()) { Entry> entry = iterator.next(); String key = entry.getKey(); - if (splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key)) { + boolean match; + if (nonNative) { + // For non-native tables, we need to do an exact match to avoid + // HIVE-1903. (The table location contains no files, and the string + // representation of its path does not have a trailing slash.) + match = + splitPath.equals(key) || splitPathWithNoSchema.equals(key); + } else { + // But for native tables, we need to do a prefix match for + // subdirectories. (Unlike non-native tables, prefix mixups don't seem + // to be a potential problem here since we are always dealing with the + // path to something deeper than the table location.) + match = + splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key); + } + if (match) { ArrayList list = entry.getValue(); for (String val : list) { aliases.add(val);