Index: ql/src/test/results/clientpositive/rcfile_createas1.q.out =================================================================== --- ql/src/test/results/clientpositive/rcfile_createas1.q.out (revision 1177363) +++ ql/src/test/results/clientpositive/rcfile_createas1.q.out (working copy) @@ -60,11 +60,12 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3 + Stage-6 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-6 Stage-3 - Stage-0 depends on stages: Stage-3, Stage-2 - Stage-5 depends on stages: Stage-0 - Stage-2 STAGE PLANS: Stage: Stage-1 @@ -89,22 +90,22 @@ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - Stage: Stage-4 + Stage: Stage-5 Conditional Operator - Stage: Stage-3 + Stage: Stage-4 Move Operator files: hdfs directory: true - destination: pfile:/data/users/franklin/hive-block-merge/build/ql/scratchdir/hive_2011-06-09_16-06-50_525_4856448737963146161/-ext-10001 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_10-29-52_870_2014775106188241531/-ext-10001 Stage: Stage-0 Move Operator files: hdfs directory: true - destination: pfile:/data/users/franklin/hive-block-merge/build/ql/test/data/warehouse/rcfile_createas1b + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/rcfile_createas1b - Stage: Stage-5 + Stage: Stage-6 Create Table Operator: Create Table columns: key int, value string, part int @@ -117,6 +118,9 @@ isExternal: false Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 Block level merge @@ -146,7 +150,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@rcfile_createas1a@ds=1 PREHOOK: Input: default@rcfile_createas1a@ds=2 -PREHOOK: Output: file:/tmp/franklin/hive_2011-06-09_16-06-54_053_5965587433920310393/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_10-30-02_246_6326040558544119160/-mr-10000 POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) FROM rcfile_createas1a @@ -154,7 +158,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_createas1a@ds=1 POSTHOOK: Input: default@rcfile_createas1a@ds=2 -POSTHOOK: Output: file:/tmp/franklin/hive_2011-06-09_16-06-54_053_5965587433920310393/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_10-30-02_246_6326040558544119160/-mr-10000 POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -166,14 +170,14 @@ ) t PREHOOK: type: QUERY PREHOOK: Input: default@rcfile_createas1b -PREHOOK: Output: file:/tmp/franklin/hive_2011-06-09_16-06-57_460_3734087433150140544/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_10-30-08_244_8991470352975252351/-mr-10000 POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) FROM rcfile_createas1b ) t POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_createas1b -POSTHOOK: Output: file:/tmp/franklin/hive_2011-06-09_16-06-57_460_3734087433150140544/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_10-30-08_244_8991470352975252351/-mr-10000 POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/smb_mapjoin9.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin9.q.out (revision 1177363) +++ ql/src/test/results/clientpositive/smb_mapjoin9.q.out (working copy) @@ -54,7 +54,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -114,9 +115,9 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/charleschen/hive-trunk/build/ql/test/data/warehouse/smb_mapjoin9_results + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/smb_mapjoin9_results - Stage: Stage-3 + Stage: Stage-4 Create Table Operator: Create Table columns: k1 int, value string, ds string, k2 int @@ -127,7 +128,10 @@ name: smb_mapjoin9_results isExternal: false + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: create table smb_mapjoin9_results as SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 FROM hive_test_smb_bucket1 a JOIN Index: ql/src/test/results/clientpositive/ctas.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas.q.out (revision 1177363) +++ ql/src/test/results/clientpositive/ctas.q.out (working copy) @@ -6,11 +6,11 @@ PREHOOK: query: select * from nzhang_Tmp PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_tmp -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-44-51_322_2848708186205887611/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-50-54_750_4326131288455090667/-mr-10000 POSTHOOK: query: select * from nzhang_Tmp POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_tmp -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-44-51_322_2848708186205887611/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-50-54_750_4326131288455090667/-mr-10000 PREHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 @@ -22,7 +22,8 @@ Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -64,7 +65,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-10_01-44-51_470_2994705028366987051/-mr-10002 + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-50-54_965_7902820563655493628/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -92,9 +93,9 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/nzhang_ctas1 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/nzhang_ctas1 - Stage: Stage-3 + Stage: Stage-4 Create Table Operator: Create Table columns: k string, value string @@ -105,7 +106,10 @@ name: nzhang_CTAS1 isExternal: false + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -116,11 +120,11 @@ PREHOOK: query: select * from nzhang_CTAS1 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas1 -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-44-58_596_3850188449580676786/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-08_506_1265530398126489693/-mr-10000 POSTHOOK: query: select * from nzhang_CTAS1 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas1 -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-44-58_596_3850188449580676786/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-08_506_1265530398126489693/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -142,7 +146,8 @@ Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -184,7 +189,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-10_01-44-58_860_3982645483317411637/-mr-10002 + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-08_679_7364781728359111056/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -212,9 +217,9 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/nzhang_ctas2 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/nzhang_ctas2 - Stage: Stage-3 + Stage: Stage-4 Create Table Operator: Create Table columns: key string, value string @@ -225,7 +230,10 @@ name: nzhang_ctas2 isExternal: false + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -236,11 +244,11 @@ PREHOOK: query: select * from nzhang_ctas2 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas2 -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-05_578_7349012551226510377/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-19_863_4047035371422100488/-mr-10000 POSTHOOK: query: select * from nzhang_ctas2 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas2 -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-05_578_7349012551226510377/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-19_863_4047035371422100488/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -262,7 +270,8 @@ Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -304,7 +313,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-10_01-45-05_893_3271637980302783261/-mr-10002 + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-20_014_4011472844743721657/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -332,9 +341,9 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/nzhang_ctas3 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/nzhang_ctas3 - Stage: Stage-3 + Stage: Stage-4 Create Table Operator: Create Table columns: half_key double, conb string @@ -346,7 +355,10 @@ name: nzhang_ctas3 isExternal: false + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -357,11 +369,11 @@ PREHOOK: query: select * from nzhang_ctas3 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas3 -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-12_580_9113217289697940221/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-31_721_72388894515556153/-mr-10000 POSTHOOK: query: select * from nzhang_ctas3 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas3 -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-12_580_9113217289697940221/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-31_721_72388894515556153/-mr-10000 0.0 val_0_con 0.0 val_0_con 0.0 val_0_con @@ -390,11 +402,11 @@ PREHOOK: query: select * from nzhang_ctas3 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas3 -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-12_947_7015621330413441447/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-31_945_8559439354987147912/-mr-10000 POSTHOOK: query: select * from nzhang_ctas3 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas3 -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-12_947_7015621330413441447/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-31_945_8559439354987147912/-mr-10000 0.0 val_0_con 0.0 val_0_con 0.0 val_0_con @@ -416,7 +428,8 @@ Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -458,7 +471,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-10_01-45-13_334_919564477125108638/-mr-10002 + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-32_103_9202211043433876771/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -486,9 +499,9 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/nzhang_ctas4 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/nzhang_ctas4 - Stage: Stage-3 + Stage: Stage-4 Create Table Operator: Create Table columns: key string, value string @@ -500,7 +513,10 @@ name: nzhang_ctas4 isExternal: false + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -511,11 +527,11 @@ PREHOOK: query: select * from nzhang_ctas4 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas4 -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-20_050_1736075504443010216/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-43_342_1995127505315715419/-mr-10000 POSTHOOK: query: select * from nzhang_ctas4 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas4 -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-45-20_050_1736075504443010216/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-43_342_1995127505315715419/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -537,7 +553,8 @@ Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -569,9 +586,9 @@ type: string Needs Tagging: false Path -> Alias: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src [src] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/src [src] Path -> Partition: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -582,12 +599,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297328964 + transient_lastDdlTime 1317257453 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -598,12 +615,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297328964 + transient_lastDdlTime 1317257453 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src @@ -613,7 +630,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/sdong/hive_2011-02-10_01-45-20_305_5281789596105491506/-mr-10002 + directory: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-43_495_1405662147563997042/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -629,7 +646,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-10_01-45-20_305_5281789596105491506/-mr-10002 + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-43_495_1405662147563997042/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -645,9 +662,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/tmp/sdong/hive_2011-02-10_01-45-20_305_5281789596105491506/-mr-10002 [file:/tmp/sdong/hive_2011-02-10_01-45-20_305_5281789596105491506/-mr-10002] + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-43_495_1405662147563997042/-mr-10002 [file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-43_495_1405662147563997042/-mr-10002] Path -> Partition: - file:/tmp/sdong/hive_2011-02-10_01-45-20_305_5281789596105491506/-mr-10002 + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-28_17-51-43_495_1405662147563997042/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -669,9 +686,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-45-20_305_5281789596105491506/-ext-10001 + directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-28_17-51-43_495_1405662147563997042/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-45-20_305_5281789596105491506/-ext-10001/ + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-28_17-51-43_495_1405662147563997042/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -683,17 +700,17 @@ serialization.format , TotalFiles: 1 - GatherStats: false + GatherStats: true MultiFileSpray: false Stage: Stage-0 Move Operator files: hdfs directory: true - source: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-45-20_305_5281789596105491506/-ext-10001 - destination: pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/nzhang_ctas5 + source: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-28_17-51-43_495_1405662147563997042/-ext-10001 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/nzhang_ctas5 - Stage: Stage-3 + Stage: Stage-4 Create Table Operator: Create Table columns: key string, value string @@ -707,7 +724,11 @@ name: nzhang_ctas5 isExternal: false + Stage: Stage-3 + Stats-Aggr Operator + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-28_17-51-43_495_1405662147563997042/-ext-10001/ + PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src Index: ql/src/test/results/clientpositive/merge3.q.out =================================================================== --- ql/src/test/results/clientpositive/merge3.q.out (revision 1177363) +++ ql/src/test/results/clientpositive/merge3.q.out (working copy) @@ -54,11 +54,12 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3 + Stage-6 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-6 Stage-3 - Stage-0 depends on stages: Stage-3, Stage-2 - Stage-5 depends on stages: Stage-0 - Stage-2 STAGE PLANS: Stage: Stage-1 @@ -78,9 +79,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10002 + directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10001/ + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -89,13 +90,13 @@ columns.types string:string serialization.format 1 TotalFiles: 1 - GatherStats: false + GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src [merge_src] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src [merge_src] Path -> Partition: - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src Partition base file name: merge_src input format: org.apache.hadoop.mapred.TextInputFormat @@ -106,12 +107,17 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src name default.merge_src + numFiles 4 + numPartitions 0 + numRows 2000 + rawDataSize 21248 serialization.ddl struct merge_src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983697 + totalSize 23248 + transient_lastDdlTime 1317315386 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -122,34 +128,39 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src name default.merge_src + numFiles 4 + numPartitions 0 + numRows 2000 + rawDataSize 21248 serialization.ddl struct merge_src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983697 + totalSize 23248 + transient_lastDdlTime 1317315386 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src name: default.merge_src - Stage: Stage-4 + Stage: Stage-5 Conditional Operator - Stage: Stage-3 + Stage: Stage-4 Move Operator files: hdfs directory: true - source: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10002 - destination: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10001 + source: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10002 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10001 Stage: Stage-0 Move Operator files: hdfs directory: true - source: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10001 - destination: pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src2 + source: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10001 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src2 - Stage: Stage-5 + Stage: Stage-6 Create Table Operator: Create Table columns: key string, value string @@ -161,13 +172,17 @@ isExternal: false Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10001/ + + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10002 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10001 + directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10001 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -181,9 +196,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10002 [pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10002] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10002 [pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10002] Path -> Partition: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-44_376_1447687532118180204/-ext-10002 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-34_166_4302165235844983728/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -217,11 +232,11 @@ PREHOOK: query: select * from merge_src2 PREHOOK: type: QUERY PREHOOK: Input: default@merge_src2 -PREHOOK: Output: file:/tmp/tomasz/hive_2011-06-01_20-01-50_658_5738218994707061399/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_09-56-44_226_2834757649700687961/-mr-10000 POSTHOOK: query: select * from merge_src2 POSTHOOK: type: QUERY POSTHOOK: Input: default@merge_src2 -POSTHOOK: Output: file:/tmp/tomasz/hive_2011-06-01_20-01-50_658_5738218994707061399/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_09-56-44_226_2834757649700687961/-mr-10000 POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] @@ -2280,9 +2295,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/home/amarsri/workspace/hive/build/ql/scratchdir/hive_2011-07-11_02-35-46_067_3066864399805575043/-ext-10002 + directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/home/amarsri/workspace/hive/build/ql/scratchdir/hive_2011-07-11_02-35-46_067_3066864399805575043/-ext-10000/ + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2292,13 +2307,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/home/amarsri/workspace/hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1310376946 + transient_lastDdlTime 1317315404 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 TotalFiles: 1 @@ -2306,10 +2321,10 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 [merge_src_part] - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 [merge_src_part] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 [merge_src_part] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 [merge_src_part] Path -> Partition: - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -2322,18 +2337,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 name default.merge_src_part - numFiles 2 - numPartitions 2 - numRows 1000 partition_columns ds - rawDataSize 10624 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315393 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -2344,22 +2354,17 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part name default.merge_src_part - numFiles 4 - numPartitions 2 - numRows 2000 partition_columns ds - rawDataSize 21248 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315386 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part name: default.merge_src_part - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 Partition base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat @@ -2372,18 +2377,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 name default.merge_src_part - numFiles 2 - numPartitions 2 - numRows 1000 partition_columns ds - rawDataSize 10624 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315394 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -2394,18 +2394,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part name default.merge_src_part - numFiles 4 - numPartitions 2 - numRows 2000 partition_columns ds - rawDataSize 21248 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315386 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part name: default.merge_src_part @@ -2417,8 +2412,8 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10002 - destination: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10000 + source: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10002 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10000 Stage: Stage-0 Move Operator @@ -2426,7 +2421,7 @@ partition: ds replace: true - source: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10000 + source: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2436,29 +2431,29 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983711 + transient_lastDdlTime 1317315404 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 - tmp directory: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10001 + tmp directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10002 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10000 + directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2469,13 +2464,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983711 + transient_lastDdlTime 1317315404 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 TotalFiles: 1 @@ -2483,9 +2478,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10002 [pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10002] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10002 [pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10002] Path -> Partition: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-01-51_051_6120737160044680268/-ext-10002 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-44_554_5764124493152347521/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -2496,13 +2491,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983711 + transient_lastDdlTime 1317315404 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -2513,13 +2508,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983711 + transient_lastDdlTime 1317315404 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 name: default.merge_src_part2 @@ -2566,12 +2561,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@merge_src_part2@ds=2008-04-08 PREHOOK: Input: default@merge_src_part2@ds=2008-04-09 -PREHOOK: Output: file:/tmp/tomasz/hive_2011-06-01_20-02-00_759_2190604757532214555/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_09-56-56_827_5066783980290452555/-mr-10000 POSTHOOK: query: select * from merge_src_part2 where ds is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@merge_src_part2@ds=2008-04-08 POSTHOOK: Input: default@merge_src_part2@ds=2008-04-09 -POSTHOOK: Output: file:/tmp/tomasz/hive_2011-06-01_20-02-00_759_2190604757532214555/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_09-56-56_827_5066783980290452555/-mr-10000 POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] @@ -4670,10 +4665,10 @@ type: string Needs Tagging: false Path -> Alias: - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 [s:merge_src_part] - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 [s:merge_src_part] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 [s:merge_src_part] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 [s:merge_src_part] Path -> Partition: - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -4686,18 +4681,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-08 name default.merge_src_part - numFiles 2 - numPartitions 2 - numRows 1000 partition_columns ds - rawDataSize 10624 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315393 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -4708,22 +4698,17 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part name default.merge_src_part - numFiles 4 - numPartitions 2 - numRows 2000 partition_columns ds - rawDataSize 21248 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315386 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part name: default.merge_src_part - pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 Partition base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat @@ -4736,18 +4721,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part/ds=2008-04-09 name default.merge_src_part - numFiles 2 - numPartitions 2 - numRows 1000 partition_columns ds - rawDataSize 10624 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315394 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -4758,18 +4738,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part name default.merge_src_part - numFiles 4 - numPartitions 2 - numRows 2000 partition_columns ds - rawDataSize 21248 serialization.ddl struct merge_src_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 - transient_lastDdlTime 1306983704 + transient_lastDdlTime 1317315386 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part name: default.merge_src_part @@ -4787,9 +4762,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10002 + directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10000/ + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4799,13 +4774,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983721 + transient_lastDdlTime 1317315417 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 TotalFiles: 1 @@ -4819,8 +4794,8 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10002 - destination: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10000 + source: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10002 + destination: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10000 Stage: Stage-0 Move Operator @@ -4828,7 +4803,7 @@ partition: ds replace: true - source: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10000 + source: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4838,29 +4813,29 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983721 + transient_lastDdlTime 1317315417 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 - tmp directory: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10001 + tmp directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10002 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10000 + directory: pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4871,13 +4846,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983721 + transient_lastDdlTime 1317315417 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 TotalFiles: 1 @@ -4885,9 +4860,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10002 [pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10002] + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10002 [pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10002] Path -> Partition: - pfile:/data/users/tomasz/apache-hive/build/ql/scratchdir/hive_2011-06-01_20-02-01_691_3177939093965437064/-ext-10002 + pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/scratchdir/hive_2011-09-29_09-56-57_901_4053860490462130628/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -4898,13 +4873,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983721 + transient_lastDdlTime 1317315417 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -4915,13 +4890,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/tomasz/apache-hive/build/ql/test/data/warehouse/merge_src_part2 + location pfile:/Users/kevinwilfong/Documents/hive_ctas_stats/build/ql/test/data/warehouse/merge_src_part2 name default.merge_src_part2 partition_columns ds serialization.ddl struct merge_src_part2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1306983721 + transient_lastDdlTime 1317315417 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 name: default.merge_src_part2 @@ -4976,12 +4951,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@merge_src_part2@ds=2008-04-08 PREHOOK: Input: default@merge_src_part2@ds=2008-04-09 -PREHOOK: Output: file:/tmp/tomasz/hive_2011-06-01_20-02-09_192_4139404353176082441/-mr-10000 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_09-57-06_270_8212449173043851671/-mr-10000 POSTHOOK: query: select * from merge_src_part2 where ds is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@merge_src_part2@ds=2008-04-08 POSTHOOK: Input: default@merge_src_part2@ds=2008-04-09 -POSTHOOK: Output: file:/tmp/tomasz/hive_2011-06-01_20-02-09_192_4139404353176082441/-mr-10000 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-29_09-57-06_270_8212449173043851671/-mr-10000 POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1177363) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.parse.ErrorMsg; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -126,6 +127,11 @@ addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } + if (parseCtx.getQB().isCTAS() && + hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { + createStatsTaskForCTAS(fsOp, mvTask, currTask, parseCtx.getConf(), parseCtx.getQB()); + } + if ((mvTask != null) && !mvTask.isLocal()) { // There are separate configuration parameters to control whether to // merge for a map-only job @@ -154,6 +160,38 @@ return null; } + private void createStatsTaskForCTAS(FileSinkOperator nd, MoveTask mvTask, + Task currTask, HiveConf hconf, QB qb) { + + StatsWork statsWork = new StatsWork(qb.getTableDesc()); + + Task statsTask = createStatsTask(statsWork, nd, mvTask, currTask, hconf); + + qb.setStatsTask(statsTask); + } + + private Task createStatsTask(StatsWork statsWork, FileSinkOperator nd, + MoveTask mvTask, Task currTask, HiveConf hconf) { + + MapredWork mrWork = (MapredWork) currTask.getWork(); + + // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix + // in FileSinkDesc is used for stats publishing. They should be consistent. + statsWork.setAggKey(((FileSinkOperator)nd).getConf().getStatsAggPrefix()); + Task statsTask = TaskFactory.get(statsWork, hconf); + + // mark the MapredWork and FileSinkOperator for gathering stats + nd.getConf().setGatherStats(true); + mrWork.setGatheringStats(true); + // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); + + // subscribe feeds from the MoveTask so that MoveTask can forward the list + // of dynamic partition list to the StatsTask + statsTask.subscribeFeed(mvTask); + + return statsTask; + } + /** * Add the StatsTask as a dependent task of the MoveTask * because StatsTask will change the Table/Partition metadata. For atomicity, we @@ -168,22 +206,10 @@ MoveWork mvWork = ((MoveTask)mvTask).getWork(); StatsWork statsWork = new StatsWork(mvWork.getLoadTableWork()); - MapredWork mrWork = (MapredWork) currTask.getWork(); - // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix - // in FileSinkDesc is used for stats publishing. They should be consistent. - statsWork.setAggKey(((FileSinkOperator)nd).getConf().getStatsAggPrefix()); - Task statsTask = TaskFactory.get(statsWork, hconf); + Task statsTask = createStatsTask(statsWork, nd, mvTask, currTask, hconf); - // mark the MapredWork and FileSinkOperator for gathering stats - nd.getConf().setGatherStats(true); - mrWork.setGatheringStats(true); - // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); - - // subscribe feeds from the MoveTask so that MoveTask can forward the list - // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); - statsTask.subscribeFeed(mvTask); } private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) Index: ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java (revision 1177363) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java (working copy) @@ -216,15 +216,24 @@ public int execute(DriverContext driverContext) { LOG.info("Executing stats task"); - // Make sure that it is either an ANALYZE command or an INSERT OVERWRITE command - assert (work.getLoadTableDesc() != null && work.getTableSpecs() == null || work - .getLoadTableDesc() == null && work.getTableSpecs() != null); + + // Make sure that it is either an ANALYZE command or an INSERT OVERWRITE command or a CTAS command + boolean isAnalyze = work.getLoadTableDesc() != null && work.getTableSpecs() == null && + work.getCreateTableDesc() == null; + boolean isInsertOverwrite = work.getLoadTableDesc() == null && work.getTableSpecs() != null && + work.getCreateTableDesc() == null; + boolean isCTAS = work.getLoadTableDesc() == null && work.getTableSpecs() == null && + work.getCreateTableDesc() != null; + assert (isAnalyze || isInsertOverwrite || isCTAS); + String tableName = ""; try { if (work.getLoadTableDesc() != null) { tableName = work.getLoadTableDesc().getTable().getTableName(); - } else { + } else if (work.getTableSpecs() != null) { tableName = work.getTableSpecs().tableName; + } else if (work.getCreateTableDesc() != null) { + tableName = work.getCreateTableDesc().getTableName(); } table = db.getTable(tableName); @@ -310,7 +319,7 @@ // In case of a non-partitioned table, the key for stats temporary store is "rootDir" if (statsAggregator != null) { - updateStats(collectableStats, tblStats, statsAggregator, parameters, + updateStats(collectableStats, tblStats, statsAggregator, parameters, work.getAggKey(), atomic); statsAggregator.cleanUp(work.getAggKey()); } @@ -349,7 +358,7 @@ LOG.info("Stats aggregator : " + partitionID); if (statsAggregator != null) { - updateStats(collectableStats, newPartStats, statsAggregator, + updateStats(collectableStats, newPartStats, statsAggregator, parameters, partitionID, atomic); } else { for (String statType : collectableStats) { @@ -447,7 +456,7 @@ if (value != null) { longValue = Long.parseLong(value); - if (work.getLoadTableDesc() != null && + if (work.getLoadTableDesc() != null && !work.getLoadTableDesc().getReplace()) { String originalValue = parameters.get(statType); if (originalValue != null) { @@ -509,7 +518,11 @@ Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); list.add(partn); } + } else if (work.getCreateTableDesc() != null) { + // CTAS does not support partitioned tables + return null; } + return list; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java (revision 1177363) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java (working copy) @@ -30,10 +30,11 @@ public class StatsWork implements Serializable { private static final long serialVersionUID = 1L; - private tableSpec tableSpecs; // source table spec -- for TableScanOperator - private LoadTableDesc loadTableDesc; // same as MoveWork.loadTableDesc -- for FileSinkOperator - private String aggKey; // aggregation key prefix - + private tableSpec tableSpecs; // source table spec -- for TableScanOperator + private LoadTableDesc loadTableDesc; // same as MoveWork.loadTableDesc -- for FileSinkOperator + private CreateTableDesc createTableDesc; // table to be created spec -- for FileSinkOperator for CTAS + private String aggKey; // aggregation key prefix + private boolean noStatsAggregator = false; public StatsWork() { @@ -47,6 +48,10 @@ this.loadTableDesc = loadTableDesc; } + public StatsWork(CreateTableDesc createTableDesc) { + this.createTableDesc = createTableDesc; + } + public tableSpec getTableSpecs() { return tableSpecs; } @@ -55,6 +60,10 @@ return loadTableDesc; } + public CreateTableDesc getCreateTableDesc() { + return createTableDesc; + } + public void setAggKey(String aggK) { aggKey = aggK; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (revision 1177363) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (working copy) @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.parse; +import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -25,6 +26,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; /** @@ -49,6 +51,9 @@ private String id; private boolean isQuery; private CreateTableDesc tblDesc = null; // table descriptor of the final + // if the qb is for a CTAS, the file sink operator at the end of the select will populate this + // with the stats task for the move task. + private Task statsTask = null; // results @@ -201,4 +206,12 @@ public boolean isCTAS() { return tblDesc != null; } + + public Task getStatsTask() { + return statsTask; + } + + public void setStatsTask(Task statsTask) { + this.statsTask = statsTask; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1177363) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -7013,6 +7013,12 @@ Task crtTblTask = TaskFactory.get(new DDLWork( getInputs(), getOutputs(), crtTblDesc), conf); + // Add the stats task that should have been created by the file sink operator + // at the end of the select + if (qb.getStatsTask() != null) { + crtTblTask.addDependentTask(qb.getStatsTask()); + } + // find all leaf tasks and make the DDLTask as a dependent task of all of // them HashSet> leaves = new HashSet>();