diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 6a4a360..4fa5f2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -217,7 +217,7 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, LOG.info(ErrorMsg.INVALID_JDO_FILTER_EXPRESSION.getMsg("by condition '" + message + "'")); pruneBySequentialScan(tab, true_parts, unkn_parts, denied_parts, - prunerExpr, rowObjectInspector); + prunerExpr, rowObjectInspector, conf); } } } @@ -300,10 +300,11 @@ static private void pruneByPushDown(Table tab, Set true_parts, String * @param denied_parts pruned out partitions. * @param prunerExpr the SQL predicate that involves partition columns. * @param rowObjectInspector object inspector used by the evaluator + * @param conf Hive Configuration object, can not be NULL. * @throws Exception */ static private void pruneBySequentialScan(Table tab, Set true_parts, Set unkn_parts, - Set denied_parts, ExprNodeDesc prunerExpr, StructObjectInspector rowObjectInspector) + Set denied_parts, ExprNodeDesc prunerExpr, StructObjectInspector rowObjectInspector, HiveConf conf) throws Exception { List trueNames = null; @@ -320,6 +321,7 @@ static private void pruneBySequentialScan(Table tab, Set true_parts, List partCols = new ArrayList(pCols.size()); List values = new ArrayList(pCols.size()); Object[] objectWithPart = new Object[2]; + String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); for (FieldSchema pCol : pCols) { partCols.add(pCol.getName()); @@ -344,11 +346,17 @@ static private void pruneBySequentialScan(Table tab, Set true_parts, Boolean r = (Boolean) PartExprEvalUtils.evaluateExprOnPart(handle, objectWithPart); if (r == null) { - if (unknNames == null) { - unknNames = new LinkedList(); + // Reject default partitions if we couldn't determine whether we should include it or not. + // Note that predicate would only contains partition column parts of original predicate. + if (values.contains(defaultPartitionName)) { + LOG.debug("skipping default/bad partition: " + partName); + }else { + if (unknNames == null) { + unknNames = new LinkedList(); + } + unknNames.add(partName); + LOG.debug("retained unknown partition: " + partName); } - unknNames.add(partName); - LOG.debug("retained unknown partition: " + partName); } else if (Boolean.TRUE.equals(r)) { if (trueNames == null) { trueNames = new LinkedList(); diff --git ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q new file mode 100644 index 0000000..397a220 --- /dev/null +++ ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q @@ -0,0 +1,19 @@ +create table dynamic_part_table(intcol int) partitioned by (partcol1 int, partcol2 int); + +set hive.exec.dynamic.partition.mode=nonstrict; + +insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, 1 from src where key=150; + +insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, 1 from src where key=150; + +insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, NULL from src where key=150; + +insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, NULL from src where key=150; + +explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1; + +set hive.exec.dynamic.partition.mode=strict; + +explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1; + +explain extended select intcol from dynamic_part_table where (partcol1=1 and partcol2=1)or (partcol1=1 and partcol2='__HIVE_DEFAULT_PARTITION__'); diff --git ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out new file mode 100644 index 0000000..68a4f80 --- /dev/null +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -0,0 +1,406 @@ +PREHOOK: query: create table dynamic_part_table(intcol int) partitioned by (partcol1 int, partcol2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dynamic_part_table(intcol int) partitioned by (partcol1 int, partcol2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dynamic_part_table +PREHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, 1 from src where key=150 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dynamic_part_table +POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, 1 from src where key=150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dynamic_part_table@partcol1=1/partcol2=1 +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] +PREHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, 1 from src where key=150 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dynamic_part_table +POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, 1 from src where key=150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dynamic_part_table@partcol1=__HIVE_DEFAULT_PARTITION__/partcol2=1 +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] +PREHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, NULL from src where key=150 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dynamic_part_table +POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, NULL from src where key=150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dynamic_part_table@partcol1=1/partcol2=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] +PREHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, NULL from src where key=150 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dynamic_part_table +POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, NULL from src where key=150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dynamic_part_table@partcol1=__HIVE_DEFAULT_PARTITION__/partcol2=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +PREHOOK: query: explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dynamic_part_table))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL intcol))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL partcol1) 1) (= (TOK_TABLE_OR_COL partcol2) 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + dynamic_part_table + TableScan + alias: dynamic_part_table + GatherStats: false + Select Operator + expressions: + expr: intcol + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: partcol2=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + partcol1 1 + partcol2 1 + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 1 + numRows 1 + partition_columns partcol1/partcol2 + rawDataSize 1 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 4 + numPartitions 4 + numRows 4 + partition_columns partcol1/partcol2 + rawDataSize 4 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynamic_part_table + name: default.dynamic_part_table + Truncated Path -> Alias: + /dynamic_part_table/partcol1=1/partcol2=1 [dynamic_part_table] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dynamic_part_table))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL intcol))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL partcol1) 1) (= (TOK_TABLE_OR_COL partcol2) 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + dynamic_part_table + TableScan + alias: dynamic_part_table + GatherStats: false + Select Operator + expressions: + expr: intcol + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: partcol2=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + partcol1 1 + partcol2 1 + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 1 + numRows 1 + partition_columns partcol1/partcol2 + rawDataSize 1 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 4 + numPartitions 4 + numRows 4 + partition_columns partcol1/partcol2 + rawDataSize 4 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynamic_part_table + name: default.dynamic_part_table + Truncated Path -> Alias: + /dynamic_part_table/partcol1=1/partcol2=1 [dynamic_part_table] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select intcol from dynamic_part_table where (partcol1=1 and partcol2=1)or (partcol1=1 and partcol2='__HIVE_DEFAULT_PARTITION__') +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select intcol from dynamic_part_table where (partcol1=1 and partcol2=1)or (partcol1=1 and partcol2='__HIVE_DEFAULT_PARTITION__') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] +POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dynamic_part_table))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL intcol))) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL partcol1) 1) (= (TOK_TABLE_OR_COL partcol2) 1)) (and (= (TOK_TABLE_OR_COL partcol1) 1) (= (TOK_TABLE_OR_COL partcol2) '__HIVE_DEFAULT_PARTITION__')))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + dynamic_part_table + TableScan + alias: dynamic_part_table + GatherStats: false + Select Operator + expressions: + expr: intcol + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: partcol2=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + partcol1 1 + partcol2 1 + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 1 + numRows 1 + partition_columns partcol1/partcol2 + rawDataSize 1 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 4 + numPartitions 4 + numRows 4 + partition_columns partcol1/partcol2 + rawDataSize 4 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynamic_part_table + name: default.dynamic_part_table +#### A masked pattern was here #### + Partition + base file name: partcol2=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + partcol1 1 + partcol2 __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 1 + numRows 1 + partition_columns partcol1/partcol2 + rawDataSize 1 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns intcol + columns.types int +#### A masked pattern was here #### + name default.dynamic_part_table + numFiles 4 + numPartitions 4 + numRows 4 + partition_columns partcol1/partcol2 + rawDataSize 4 + serialization.ddl struct dynamic_part_table { i32 intcol} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynamic_part_table + name: default.dynamic_part_table + Truncated Path -> Alias: + /dynamic_part_table/partcol1=1/partcol2=1 [dynamic_part_table] + /dynamic_part_table/partcol1=1/partcol2=__HIVE_DEFAULT_PARTITION__ [dynamic_part_table] + + Stage: Stage-0 + Fetch Operator + limit: -1 + +