diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 9d5730d..6d7b493 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -22,11 +22,7 @@ import java.io.DataOutput; import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.Map.Entry; @@ -526,8 +522,20 @@ protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass // push down projections. ColumnProjectionUtils.appendReadColumns( jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns()); - // push down filters - pushFilters(jobConf, ts); + } + } + + // HIVE-10792: only push filters when there is exactly one alias. + // Consider the following SQL, + // select * from test_orc t1 + // left outer join test_orc t2 on (t1.c0=t2.c0 and t2.c1=0); + // In Map phase, "test_orc" is scanned only once using OrcInputFormat. For + // each row, MapOperator will forward it to its two child TableScanOperators. + // If "c1=0" is pushed down to OrcInputFormat, t1 will not receive all rows. + if (aliases.size() == 1) { + Operator op = mrwork.getAliasToWork().get(aliases.get(0)); + if (op instanceof TableScanOperator) { + pushFilters(jobConf, (TableScanOperator) op); } } }