Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 926684) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -3128,6 +3128,9 @@ ctx.setNumFiles(numFiles); ctx.setPartnCols(partnColsNoConvert); ctx.setTotalFiles(totalFiles); + //disable "merge mapfiles" and "merge mapred files". + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES, false); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false); } return input; } Index: ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q =================================================================== --- ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q (revision 0) +++ ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q (revision 0) @@ -0,0 +1,21 @@ +set hive.enforce.bucketing = true; +set hive.exec.reducers.max = 1; +set hive.merge.mapredfiles=true; + +drop table bucket2_1; +CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS; + +explain extended +insert overwrite table bucket2_1 +select * from src; + +insert overwrite table bucket2_1 +select * from src; + +explain +select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key; + +select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key; + + +drop table bucket2_1; \ No newline at end of file Index: ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out =================================================================== --- ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out (revision 0) +++ ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out (revision 0) @@ -0,0 +1,480 @@ +PREHOOK: query: drop table bucket2_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket2_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucket2_1 +PREHOOK: query: explain extended +insert overwrite table bucket2_1 +select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket2_1 +select * from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucket2_1)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: UDFToInteger(_col0) + type: int + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src [src] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269370923 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269370923 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: src + name: src + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-23_12-02-03_982_566371038266932645/10000 + NumFilesPerFileSink: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/bucket2_1 + name bucket2_1 + serialization.ddl struct bucket2_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269370923 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucket2_1 + TotalFiles: 2 + MultiFileSpray: true + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-23_12-02-03_982_566371038266932645/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/bucket2_1 + name bucket2_1 + serialization.ddl struct bucket2_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269370923 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucket2_1 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-23_12-02-03_982_566371038266932645/10001 + + +PREHOOK: query: insert overwrite table bucket2_1 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket2_1 +POSTHOOK: query: insert overwrite table bucket2_1 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket2_1 +PREHOOK: query: explain +select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF bucket2_1 (TOK_TABLESAMPLE 1 2) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s + TableScan + alias: s + Filter Operator + predicate: + expr: (((hash(key) & 2147483647) % 2) = 0) + type: boolean + Filter Operator + predicate: + expr: (((hash(key) & 2147483647) % 2) = 0) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-23_12-02-38_972_2579884827618647110/10000 +POSTHOOK: query: select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-23_12-02-38_972_2579884827618647110/10000 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +8 val_8 +10 val_10 +12 val_12 +12 val_12 +18 val_18 +18 val_18 +20 val_20 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +28 val_28 +30 val_30 +34 val_34 +42 val_42 +42 val_42 +44 val_44 +54 val_54 +58 val_58 +58 val_58 +64 val_64 +66 val_66 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +78 val_78 +80 val_80 +82 val_82 +84 val_84 +84 val_84 +86 val_86 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +96 val_96 +98 val_98 +98 val_98 +100 val_100 +100 val_100 +104 val_104 +104 val_104 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +120 val_120 +120 val_120 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +134 val_134 +134 val_134 +136 val_136 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +146 val_146 +146 val_146 +150 val_150 +152 val_152 +152 val_152 +156 val_156 +158 val_158 +160 val_160 +162 val_162 +164 val_164 +164 val_164 +166 val_166 +168 val_168 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +176 val_176 +176 val_176 +178 val_178 +180 val_180 +186 val_186 +190 val_190 +192 val_192 +194 val_194 +196 val_196 +200 val_200 +200 val_200 +202 val_202 +208 val_208 +208 val_208 +208 val_208 +214 val_214 +216 val_216 +216 val_216 +218 val_218 +222 val_222 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +238 val_238 +238 val_238 +242 val_242 +242 val_242 +244 val_244 +248 val_248 +252 val_252 +256 val_256 +256 val_256 +258 val_258 +260 val_260 +262 val_262 +266 val_266 +272 val_272 +272 val_272 +274 val_274 +278 val_278 +278 val_278 +280 val_280 +280 val_280 +282 val_282 +282 val_282 +284 val_284 +286 val_286 +288 val_288 +288 val_288 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +302 val_302 +306 val_306 +308 val_308 +310 val_310 +316 val_316 +316 val_316 +316 val_316 +318 val_318 +318 val_318 +318 val_318 +322 val_322 +322 val_322 +332 val_332 +336 val_336 +338 val_338 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +366 val_366 +368 val_368 +374 val_374 +378 val_378 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +392 val_392 +394 val_394 +396 val_396 +396 val_396 +396 val_396 +400 val_400 +402 val_402 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +414 val_414 +414 val_414 +418 val_418 +424 val_424 +424 val_424 +430 val_430 +430 val_430 +430 val_430 +432 val_432 +436 val_436 +438 val_438 +438 val_438 +438 val_438 +444 val_444 +446 val_446 +448 val_448 +452 val_452 +454 val_454 +454 val_454 +454 val_454 +458 val_458 +458 val_458 +460 val_460 +462 val_462 +462 val_462 +466 val_466 +466 val_466 +466 val_466 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +470 val_470 +472 val_472 +478 val_478 +478 val_478 +480 val_480 +480 val_480 +480 val_480 +482 val_482 +484 val_484 +490 val_490 +492 val_492 +492 val_492 +494 val_494 +496 val_496 +498 val_498 +498 val_498 +498 val_498 +PREHOOK: query: drop table bucket2_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket2_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucket2_1