Index: ql/src/test/results/clientpositive/cbo_correctness.q.out =================================================================== --- ql/src/test/results/clientpositive/cbo_correctness.q.out (revision 1618312) +++ ql/src/test/results/clientpositive/cbo_correctness.q.out (working copy) @@ -15789,10 +15789,10 @@ 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL -PREHOOK: query: create view v1 as select c_int, value, c_boolean from t1 +PREHOOK: query: create view v1 as select c_int, value, c_boolean, dt from t1 PREHOOK: type: CREATEVIEW PREHOOK: Input: default@t1 -POSTHOOK: query: create view v1 as select c_int, value, c_boolean from t1 +POSTHOOK: query: create view v1 as select c_int, value, c_boolean, dt from t1 POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@t1 POSTHOOK: Output: default@v1 @@ -15963,8 +15963,8 @@ POSTHOOK: Input: default@v1 #### A masked pattern was here #### 2 -PREHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), -q2 as ( select c_int,c_boolean from v1 where value = '1') +PREHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int and t1.dt='2014'), +q2 as ( select c_int,c_boolean from v1 where value = '1' or dt = '14') select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -15972,8 +15972,8 @@ PREHOOK: Input: default@v1 PREHOOK: Input: default@v4 #### A masked pattern was here #### -POSTHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), -q2 as ( select c_int,c_boolean from v1 where value = '1') +POSTHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int and t1.dt='2014'), +q2 as ( select c_int,c_boolean from v1 where value = '1' or dt = '14') select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 Index: ql/src/test/queries/clientpositive/cbo_correctness.q =================================================================== --- ql/src/test/queries/clientpositive/cbo_correctness.q (revision 1618312) +++ ql/src/test/queries/clientpositive/cbo_correctness.q (working copy) @@ -175,7 +175,7 @@ select * from (select max(c_int) over (partition by key order by value Rows UNBOUNDED PRECEDING), min(c_int) over (partition by key order by value rows current row), count(c_int) over(partition by key order by value ROWS 1 PRECEDING), avg(value) over (partition by key order by value Rows between unbounded preceding and unbounded following), sum(value) over (partition by key order by value rows between unbounded preceding and current row), avg(c_float) over (partition by key order by value Rows between 1 preceding and unbounded following), sum(c_float) over (partition by key order by value rows between 1 preceding and current row), max(c_float) over (partition by key order by value rows between 1 preceding and unbounded following), min(c_float) over (partition by key order by value rows between 1 preceding and 1 following) from t1) t1; select i, a, h, b, c, d, e, f, g, a as x, a +1 as y from (select max(c_int) over (partition by key order by value range UNBOUNDED PRECEDING) a, min(c_int) over (partition by key order by value range current row) b, count(c_int) over(partition by key order by value range 1 PRECEDING) c, avg(value) over (partition by key order by value range between unbounded preceding and unbounded following) d, sum(value) over (partition by key order by value range between unbounded preceding and current row) e, avg(c_float) over (partition by key order by value range between 1 preceding and unbounded following) f, sum(c_float) over (partition by key order by value range between 1 preceding and current row) g, max(c_float) over (partition by key order by value range between 1 preceding and unbounded following) h, min(c_float) over (partition by key order by value range between 1 preceding and 1 following) i from t1) t1; -create view v1 as select c_int, value, c_boolean from t1; +create view v1 as select c_int, value, c_boolean, dt from t1; create view v2 as select c_int, value from t2; select value from v1 where c_boolean=false; @@ -205,8 +205,8 @@ q2 as ( select c_int,c_boolean from v1 where value = '1') select sum(c_int) from (select c_int from q1) a; -with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), -q2 as ( select c_int,c_boolean from v1 where value = '1') +with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int and t1.dt='2014'), +q2 as ( select c_int,c_boolean from v1 where value = '1' or dt = '14') select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java (revision 1618312) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java (working copy) @@ -13,9 +13,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ExprNodeConverter; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -98,8 +96,11 @@ @Override public double getRowCount() { if (m_rowCount == -1) { - if (m_hiveTblMetadata.isPartitioned()) { + if (null == partitionList) { + // we are here either unpartitioned table or partitioned table with no predicates computePartitionList(m_hiveConf, null); + } + if (m_hiveTblMetadata.isPartitioned()) { List rowCounts = StatsUtils.getBasicStatForPartitions( m_hiveTblMetadata, partitionList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT); @@ -132,15 +133,9 @@ } public void computePartitionList(HiveConf conf, RexNode pruneNode) { - partitionList = null; - if (!m_hiveTblMetadata.isPartitioned()) { - // no partitions for unpartitioned tables. - return; - } - try { - if (pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) { + if (!m_hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) { // there is no predicate on partitioning column, we need all partitions in this case. partitionList = PartitionPruner.prune(m_hiveTblMetadata, null, conf, getName(), partitionCache); return; @@ -187,12 +182,11 @@ if (null == partitionList) { // We could be here either because its an unpartitioned table or because - // there are no pruning predicates on a partitioned table. If its latter, - // we need to fetch all partitions, so do that now. + // there are no pruning predicates on a partitioned table. computePartitionList(m_hiveConf, null); } - if (partitionList == null) { + if (!m_hiveTblMetadata.isPartitioned()) { // 2.1 Handle the case for unpartitioned table. hiveColStats = StatsUtils.getTableColumnStats(m_hiveTblMetadata, m_hiveNonPartitionCols, nonPartColNamesThatRqrStats); @@ -290,14 +284,11 @@ /* * use to check if a set of columns are all partition columns. * true only if: - * - there is a prunedPartList in place * - all columns in BitSet are partition * columns. */ public boolean containsPartitionColumnsOnly(BitSet cols) { - if (partitionList == null) { - return false; - } + for (int i = cols.nextSetBit(0); i >= 0; i++, i = cols.nextSetBit(i + 1)) { if (!m_hivePartitionColsMap.containsKey(i)) { return false; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java (revision 1618312) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java (working copy) @@ -275,17 +275,6 @@ return optiqLiteral; } - public static RexNode getAlwaysTruePredicate(RelOptCluster cluster) { - RelDataType dt = cluster.getTypeFactory().createSqlType(SqlTypeName.BOOLEAN); - SqlOperator optiqOp = SqlFunctionConverter.getOptiqOperator(new GenericUDFOPEqual(), - ImmutableList. of(dt), dt); - List childRexNodeLst = new LinkedList(); - childRexNodeLst.add(cluster.getRexBuilder().makeLiteral(true)); - childRexNodeLst.add(cluster.getRexBuilder().makeLiteral(true)); - - return cluster.getRexBuilder().makeCall(optiqOp, childRexNodeLst); - } - public static RexNode convert(RelOptCluster cluster, ExprNodeDesc joinCondnExprNode, List inputRels, LinkedHashMap relToHiveRR, Map> relToHiveColNameOptiqPosMap, boolean flattenExpr) Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java (revision 1618312) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java (working copy) @@ -231,9 +231,7 @@ } private void removeAlwaysTruePredicates(List predicates) { - if (predicates.size() < 2) { - return; - } + ListIterator iter = predicates.listIterator(); while (iter.hasNext()) { RexNode exp = iter.next(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1618312) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -159,26 +159,69 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map prunedPartitionsMap) throws SemanticException { + LOG.trace("Started pruning partiton"); LOG.trace("dbname = " + tab.getDbName()); LOG.trace("tabname = " + tab.getTableName()); -// LOG.trace("prune Expression = " + prunerExpr); + LOG.trace("prune Expression = " + prunerExpr == null ? "" : prunerExpr); + if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE)) + && !hasColumnExpr(prunerExpr)) { + // If the "strict" mode is on, we have to provide partition pruner for each table. + throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE + .getMsg("for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"")); + } + String key = tab.getDbName() + "." + tab.getTableName() + ";"; - if (prunerExpr != null) { - key = key + prunerExpr.getExprString(); + if (!tab.isPartitioned() || prunerExpr == null) { + // If the table is not partitioned, return everything. + // Or, Non-strict mode, and there is no predicates at all - get everything. + return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap); } - PrunedPartitionList ret = prunedPartitionsMap.get(key); - if (ret != null) { - return ret; + + Set partColsUsedInFilter = new LinkedHashSet(); + // Replace virtual columns with nulls. See javadoc for details. + prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter); + // Remove all parts that are not partition columns. See javadoc for details. + ExprNodeGenericFuncDesc compactExpr = (ExprNodeGenericFuncDesc)compactExpr(prunerExpr.clone()); + String oldFilter = prunerExpr.getExprString(); + if (compactExpr == null) { + // Non-strict mode, and all the predicates are on non-partition columns - get everything. + LOG.debug("Filter " + oldFilter + " was null after compacting"); + return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap); } + LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() + + "; filter w/o compacting: " + oldFilter); - ret = getPartitionsFromServer(tab, prunerExpr, conf, alias); - prunedPartitionsMap.put(key, ret); - return ret; + key = key + compactExpr.getExprString(); + PrunedPartitionList ppList = prunedPartitionsMap.get(key); + if (ppList != null) { + return ppList; } + ppList = getPartitionsFromServer(tab, compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString())); + prunedPartitionsMap.put(key, ppList); + return ppList; + } + + private static PrunedPartitionList getAllPartsFromCacheOrServer(Table tab, String key, boolean unknownPartitions, + Map partsCache) throws SemanticException { + PrunedPartitionList ppList = partsCache.get(key); + if (ppList != null) { + return ppList; + } + Set parts; + try { + parts = getAllPartitions(tab); + } catch (HiveException e) { + throw new SemanticException(e); + } + ppList = new PrunedPartitionList(tab, parts, null, unknownPartitions); + partsCache.put(key, ppList); + return ppList; + } + private static ExprNodeDesc removeTruePredciates(ExprNodeDesc e) { if (e instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc eC = (ExprNodeConstantDesc) e; @@ -281,41 +324,9 @@ } private static PrunedPartitionList getPartitionsFromServer(Table tab, - ExprNodeDesc prunerExpr, HiveConf conf, String alias) throws SemanticException { + final ExprNodeGenericFuncDesc compactExpr, HiveConf conf, String alias, Set partColsUsedInFilter, boolean isPruningByExactFilter) throws SemanticException { try { - if (!tab.isPartitioned()) { - // If the table is not partitioned, return everything. - return new PrunedPartitionList(tab, getAllPartitions(tab), null, false); - } - LOG.debug("tabname = " + tab.getTableName() + " is partitioned"); - if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE)) - && !hasColumnExpr(prunerExpr)) { - // If the "strict" mode is on, we have to provide partition pruner for each table. - throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE - .getMsg("for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"")); - } - - if (prunerExpr == null) { - // Non-strict mode, and there is no predicates at all - get everything. - return new PrunedPartitionList(tab, getAllPartitions(tab), null, false); - } - - Set referred = new LinkedHashSet(); - // Replace virtual columns with nulls. See javadoc for details. - prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), referred); - // Remove all parts that are not partition columns. See javadoc for details. - ExprNodeGenericFuncDesc compactExpr = (ExprNodeGenericFuncDesc)compactExpr(prunerExpr.clone()); - String oldFilter = prunerExpr.getExprString(); - if (compactExpr == null) { - // Non-strict mode, and all the predicates are on non-partition columns - get everything. - LOG.debug("Filter " + oldFilter + " was null after compacting"); - return new PrunedPartitionList(tab, getAllPartitions(tab), null, true); - } - - LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() - + "; filter w/o compacting: " + oldFilter); - // Finally, check the filter for non-built-in UDFs. If these are present, we cannot // do filtering on the server, and have to fall back to client path. boolean doEvalClientSide = hasUserFunctions(compactExpr); @@ -344,9 +355,8 @@ // The partitions are "unknown" if the call says so due to the expression // evaluator returning null for a partition, or if we sent a partial expression to // metastore and so some partitions may have no data based on other filters. - boolean isPruningByExactFilter = oldFilter.equals(compactExpr.getExprString()); return new PrunedPartitionList(tab, new LinkedHashSet(partitions), - new ArrayList(referred), + new ArrayList(partColsUsedInFilter), hasUnknownPartitions || !isPruningByExactFilter); } catch (SemanticException e) { throw e; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java (revision 1618312) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java (working copy) @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -93,7 +94,7 @@ return "null"; } - if (typeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) { + if (typeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME) || typeInfo instanceof BaseCharTypeInfo) { return "'" + value.toString() + "'"; } else if (typeInfo.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) { byte[] bytes = (byte[]) value; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1618312) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -411,7 +411,6 @@ smbMapJoinContext.clear(); opParseCtx.clear(); groupOpToInputTables.clear(); - prunedPartitions.clear(); disableJoinMerge = false; aliasToCTEs.clear(); topToTable.clear(); @@ -9583,6 +9582,7 @@ throw new RuntimeException( "Couldn't do phase1 on CBO optimized query plan"); } + prunedPartitions = ImmutableMap.copyOf(prunedPartitions); getMetaData(qb); disableJoinMerge = true; @@ -9607,6 +9607,7 @@ disableJoinMerge = false; if (reAnalyzeAST) { init(); + prunedPartitions.clear(); analyzeInternal(ast); return; } @@ -11959,7 +11960,7 @@ optiqJoinCond = RexNodeConverter.convert(m_cluster, joinCondnExprNode, inputRels, m_relToHiveRR, m_relToHiveColNameOptiqPosMap, false); } else { - optiqJoinCond = RexNodeConverter.getAlwaysTruePredicate(m_cluster); + optiqJoinCond = m_cluster.getRexBuilder().makeLiteral(true); } // 3. Validate that join condition is legal (i.e no function refering to