Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 1044064) +++ conf/hive-default.xml (working copy) @@ -686,6 +686,12 @@ + hive.stats.autogather.read + false + A flag to gather statistics automatically during the SELECT command. + + + hive.stats.jdbcdriver org.apache.derby.jdbc.EmbeddedDriver The JDBC driver for the database that stores temporary hive statistics. Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1044064) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -287,7 +287,8 @@ HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), // Statistics - HIVESTATSAUTOGATHER("hive.stats.autogather", true), + HIVESTATSAUTOGATHER("hive.stats.autogather", true), // autogather stats on write? + HIVESTATSAUTOGATHERREAD("hive.stats.autogather.read", false), // autogather stats on read? HIVESTATSDBCLASS("hive.stats.dbclass", "jdbc:derby"), // other options are jdbc:mysql and hbase as defined in StatsSetupConst.java HIVESTATSJDBCDRIVER("hive.stats.jdbcdriver", Index: ql/src/test/results/clientpositive/piggyback_part.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback_part.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback_part.q.out (revision 0) @@ -0,0 +1,1170 @@ +PREHOOK: query: CREATE TABLE piggy_part (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE piggy_part (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@piggy_part +PREHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@piggy_part@ds=2008-04-08/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@piggy_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@piggy_part@ds=2008-04-08/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@piggy_part@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@piggy_part@ds=2008-04-09/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@piggy_part@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@piggy_part@ds=2008-04-09/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@piggy_part@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select key from piggy_part where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from piggy_part where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF piggy_part)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + piggy_part + TableScan + alias: piggy_part + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key from piggy_part where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@piggy_part@ds=2008-04-08/hr=11 +PREHOOK: Input: default@piggy_part@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-28-48_428_5095573749248674967/-mr-10000 +POSTHOOK: query: select key from piggy_part where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@piggy_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@piggy_part@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-28-48_428_5095573749248674967/-mr-10000 +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +265 +193 +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 +429 +374 +152 +469 +145 +495 +37 +327 +281 +277 +209 +15 +82 +403 +166 +417 +430 +252 +292 +219 +287 +153 +193 +338 +446 +459 +394 +237 +482 +174 +413 +494 +207 +199 +466 +208 +174 +399 +396 +247 +417 +489 +162 +377 +397 +309 +365 +266 +439 +342 +367 +325 +167 +195 +475 +17 +113 +155 +203 +339 +0 +455 +128 +311 +316 +57 +302 +205 +149 +438 +345 +129 +170 +20 +489 +157 +378 +221 +92 +111 +47 +72 +4 +280 +35 +427 +277 +208 +356 +399 +169 +382 +498 +125 +386 +437 +469 +192 +286 +187 +176 +54 +459 +51 +138 +103 +239 +213 +216 +430 +278 +176 +289 +221 +65 +318 +332 +311 +275 +137 +241 +83 +333 +180 +284 +12 +230 +181 +67 +260 +404 +384 +489 +353 +373 +272 +138 +217 +84 +348 +466 +58 +8 +411 +230 +208 +348 +24 +463 +431 +179 +172 +42 +129 +158 +119 +496 +0 +322 +197 +468 +393 +454 +100 +298 +199 +191 +418 +96 +26 +165 +327 +230 +205 +120 +131 +51 +404 +43 +436 +156 +469 +468 +308 +95 +196 +288 +481 +457 +98 +282 +197 +187 +318 +318 +409 +470 +137 +369 +316 +169 +413 +85 +77 +0 +490 +87 +364 +179 +118 +134 +395 +282 +138 +238 +419 +15 +118 +72 +90 +307 +19 +435 +10 +277 +273 +306 +224 +309 +389 +327 +242 +369 +392 +272 +331 +401 +242 +452 +177 +226 +5 +497 +402 +396 +317 +395 +58 +35 +336 +95 +11 +168 +34 +229 +233 +143 +472 +322 +498 +160 +195 +42 +321 +430 +119 +489 +458 +78 +76 +41 +223 +492 +149 +449 +218 +228 +138 +453 +30 +209 +64 +468 +76 +74 +342 +69 +230 +33 +368 +103 +296 +113 +216 +367 +344 +167 +274 +219 +239 +485 +116 +223 +256 +263 +70 +487 +480 +401 +288 +191 +5 +244 +438 +128 +467 +432 +202 +316 +229 +469 +463 +280 +2 +35 +283 +331 +235 +80 +44 +193 +321 +335 +104 +466 +366 +175 +403 +483 +53 +105 +257 +406 +409 +190 +406 +401 +114 +258 +90 +203 +262 +348 +424 +12 +396 +201 +217 +164 +431 +454 +478 +298 +125 +431 +164 +424 +187 +382 +5 +70 +397 +480 +291 +24 +351 +255 +104 +70 +163 +438 +119 +414 +200 +491 +237 +439 +360 +248 +479 +305 +417 +199 +444 +120 +429 +169 +443 +323 +325 +277 +230 +478 +178 +468 +310 +317 +333 +493 +460 +207 +249 +265 +480 +83 +136 +353 +172 +214 +462 +233 +406 +133 +175 +189 +454 +375 +401 +421 +407 +384 +256 +26 +134 +67 +384 +379 +18 +462 +492 +100 +298 +9 +341 +498 +146 +458 +362 +186 +285 +348 +167 +18 +273 +183 +281 +344 +97 +469 +315 +84 +28 +37 +448 +152 +348 +307 +194 +414 +477 +222 +126 +90 +169 +403 +400 +200 +97 +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +265 +193 +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 +429 +374 +152 +469 +145 +495 +37 +327 +281 +277 +209 +15 +82 +403 +166 +417 +430 +252 +292 +219 +287 +153 +193 +338 +446 +459 +394 +237 +482 +174 +413 +494 +207 +199 +466 +208 +174 +399 +396 +247 +417 +489 +162 +377 +397 +309 +365 +266 +439 +342 +367 +325 +167 +195 +475 +17 +113 +155 +203 +339 +0 +455 +128 +311 +316 +57 +302 +205 +149 +438 +345 +129 +170 +20 +489 +157 +378 +221 +92 +111 +47 +72 +4 +280 +35 +427 +277 +208 +356 +399 +169 +382 +498 +125 +386 +437 +469 +192 +286 +187 +176 +54 +459 +51 +138 +103 +239 +213 +216 +430 +278 +176 +289 +221 +65 +318 +332 +311 +275 +137 +241 +83 +333 +180 +284 +12 +230 +181 +67 +260 +404 +384 +489 +353 +373 +272 +138 +217 +84 +348 +466 +58 +8 +411 +230 +208 +348 +24 +463 +431 +179 +172 +42 +129 +158 +119 +496 +0 +322 +197 +468 +393 +454 +100 +298 +199 +191 +418 +96 +26 +165 +327 +230 +205 +120 +131 +51 +404 +43 +436 +156 +469 +468 +308 +95 +196 +288 +481 +457 +98 +282 +197 +187 +318 +318 +409 +470 +137 +369 +316 +169 +413 +85 +77 +0 +490 +87 +364 +179 +118 +134 +395 +282 +138 +238 +419 +15 +118 +72 +90 +307 +19 +435 +10 +277 +273 +306 +224 +309 +389 +327 +242 +369 +392 +272 +331 +401 +242 +452 +177 +226 +5 +497 +402 +396 +317 +395 +58 +35 +336 +95 +11 +168 +34 +229 +233 +143 +472 +322 +498 +160 +195 +42 +321 +430 +119 +489 +458 +78 +76 +41 +223 +492 +149 +449 +218 +228 +138 +453 +30 +209 +64 +468 +76 +74 +342 +69 +230 +33 +368 +103 +296 +113 +216 +367 +344 +167 +274 +219 +239 +485 +116 +223 +256 +263 +70 +487 +480 +401 +288 +191 +5 +244 +438 +128 +467 +432 +202 +316 +229 +469 +463 +280 +2 +35 +283 +331 +235 +80 +44 +193 +321 +335 +104 +466 +366 +175 +403 +483 +53 +105 +257 +406 +409 +190 +406 +401 +114 +258 +90 +203 +262 +348 +424 +12 +396 +201 +217 +164 +431 +454 +478 +298 +125 +431 +164 +424 +187 +382 +5 +70 +397 +480 +291 +24 +351 +255 +104 +70 +163 +438 +119 +414 +200 +491 +237 +439 +360 +248 +479 +305 +417 +199 +444 +120 +429 +169 +443 +323 +325 +277 +230 +478 +178 +468 +310 +317 +333 +493 +460 +207 +249 +265 +480 +83 +136 +353 +172 +214 +462 +233 +406 +133 +175 +189 +454 +375 +401 +421 +407 +384 +256 +26 +134 +67 +384 +379 +18 +462 +492 +100 +298 +9 +341 +498 +146 +458 +362 +186 +285 +348 +167 +18 +273 +183 +281 +344 +97 +469 +315 +84 +28 +37 +448 +152 +348 +307 +194 +414 +477 +222 +126 +90 +169 +403 +400 +200 +97 +PREHOOK: query: show table extended like piggy_part +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like piggy_part +POSTHOOK: type: SHOW_TABLESTATUS +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +tableName:piggy_part +owner:pbutler +location:pfile:/home/pbutler/hive-git/build/ql/test/data/warehouse/piggy_part +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +rows:2000 +partitioned:true +partitionColumns:struct partition_columns { string ds, i32 hr} +totalNumberFiles:4 +totalFileSize:21552 +maxFileSize:5388 +minFileSize:5388 +lastAccessTime:0 +lastUpdateTime:1291156126000 + +PREHOOK: query: drop table piggy_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@piggy_part +PREHOOK: Output: default@piggy_part +POSTHOOK: query: drop table piggy_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@piggy_part +POSTHOOK: Output: default@piggy_part +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: piggy_part PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/piggyback_create.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback_create.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback_create.q.out (revision 0) @@ -0,0 +1,622 @@ +PREHOOK: query: create table piggy_table1 as select * from src +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src +POSTHOOK: query: create table piggy_table1 as select * from src +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@piggy_table1 +PREHOOK: query: explain create table my_table as select key from piggy_table1 +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table my_table as select key from piggy_table1 +POSTHOOK: type: CREATETABLE +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE my_table TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF piggy_table1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-6 depends on stages: Stage-0, Stage-5 + Stage-2 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + piggy_table1 + TableScan + alias: piggy_table1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: pfile:/home/pbutler/hive-git/build/ql/scratchdir/hive_2010-11-30_13-58-19_732_6987326443406958956/-ext-10001 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true + destination: pfile:///home/pbutler/hive-git/build/ql/test/data/warehouse/my_table + + Stage: Stage-6 + Create Table Operator: + Create Table + columns: key string + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: my_table + isExternal: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + pfile:/home/pbutler/hive-git/build/ql/scratchdir/hive_2010-11-30_13-58-19_732_6987326443406958956/-ext-10002 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: create table my_table as select key from piggy_table1 +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@piggy_table1 +POSTHOOK: query: create table my_table as select key from piggy_table1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Output: default@my_table +PREHOOK: query: show table extended like piggy_table1 +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like piggy_table1 +POSTHOOK: type: SHOW_TABLESTATUS +tableName:piggy_table1 +owner:pbutler +location:pfile:/home/pbutler/hive-git/build/ql/test/data/warehouse/piggy_table1 +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +rows:500 +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +lastAccessTime:0 +lastUpdateTime:1291154299000 + +PREHOOK: query: select * from my_table +PREHOOK: type: QUERY +PREHOOK: Input: default@my_table +PREHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_13-58-29_324_6499932653063794659/-mr-10000 +POSTHOOK: query: select * from my_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@my_table +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_13-58-29_324_6499932653063794659/-mr-10000 +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +265 +193 +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 +429 +374 +152 +469 +145 +495 +37 +327 +281 +277 +209 +15 +82 +403 +166 +417 +430 +252 +292 +219 +287 +153 +193 +338 +446 +459 +394 +237 +482 +174 +413 +494 +207 +199 +466 +208 +174 +399 +396 +247 +417 +489 +162 +377 +397 +309 +365 +266 +439 +342 +367 +325 +167 +195 +475 +17 +113 +155 +203 +339 +0 +455 +128 +311 +316 +57 +302 +205 +149 +438 +345 +129 +170 +20 +489 +157 +378 +221 +92 +111 +47 +72 +4 +280 +35 +427 +277 +208 +356 +399 +169 +382 +498 +125 +386 +437 +469 +192 +286 +187 +176 +54 +459 +51 +138 +103 +239 +213 +216 +430 +278 +176 +289 +221 +65 +318 +332 +311 +275 +137 +241 +83 +333 +180 +284 +12 +230 +181 +67 +260 +404 +384 +489 +353 +373 +272 +138 +217 +84 +348 +466 +58 +8 +411 +230 +208 +348 +24 +463 +431 +179 +172 +42 +129 +158 +119 +496 +0 +322 +197 +468 +393 +454 +100 +298 +199 +191 +418 +96 +26 +165 +327 +230 +205 +120 +131 +51 +404 +43 +436 +156 +469 +468 +308 +95 +196 +288 +481 +457 +98 +282 +197 +187 +318 +318 +409 +470 +137 +369 +316 +169 +413 +85 +77 +0 +490 +87 +364 +179 +118 +134 +395 +282 +138 +238 +419 +15 +118 +72 +90 +307 +19 +435 +10 +277 +273 +306 +224 +309 +389 +327 +242 +369 +392 +272 +331 +401 +242 +452 +177 +226 +5 +497 +402 +396 +317 +395 +58 +35 +336 +95 +11 +168 +34 +229 +233 +143 +472 +322 +498 +160 +195 +42 +321 +430 +119 +489 +458 +78 +76 +41 +223 +492 +149 +449 +218 +228 +138 +453 +30 +209 +64 +468 +76 +74 +342 +69 +230 +33 +368 +103 +296 +113 +216 +367 +344 +167 +274 +219 +239 +485 +116 +223 +256 +263 +70 +487 +480 +401 +288 +191 +5 +244 +438 +128 +467 +432 +202 +316 +229 +469 +463 +280 +2 +35 +283 +331 +235 +80 +44 +193 +321 +335 +104 +466 +366 +175 +403 +483 +53 +105 +257 +406 +409 +190 +406 +401 +114 +258 +90 +203 +262 +348 +424 +12 +396 +201 +217 +164 +431 +454 +478 +298 +125 +431 +164 +424 +187 +382 +5 +70 +397 +480 +291 +24 +351 +255 +104 +70 +163 +438 +119 +414 +200 +491 +237 +439 +360 +248 +479 +305 +417 +199 +444 +120 +429 +169 +443 +323 +325 +277 +230 +478 +178 +468 +310 +317 +333 +493 +460 +207 +249 +265 +480 +83 +136 +353 +172 +214 +462 +233 +406 +133 +175 +189 +454 +375 +401 +421 +407 +384 +256 +26 +134 +67 +384 +379 +18 +462 +492 +100 +298 +9 +341 +498 +146 +458 +362 +186 +285 +348 +167 +18 +273 +183 +281 +344 +97 +469 +315 +84 +28 +37 +448 +152 +348 +307 +194 +414 +477 +222 +126 +90 +169 +403 +400 +200 +97 +PREHOOK: query: drop table piggy_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table piggy_table +POSTHOOK: type: DROPTABLE Index: ql/src/test/results/clientpositive/piggyback_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback_limit.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback_limit.q.out (revision 0) @@ -0,0 +1,59 @@ +PREHOOK: query: create table piggy_table1 as select * from src1 +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src1 +POSTHOOK: query: create table piggy_table1 as select * from src1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@piggy_table1 +PREHOOK: query: explain select key from piggy_table1 limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from piggy_table1 limit 4 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF piggy_table1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 4))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + piggy_table1 + TableScan + alias: piggy_table1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 4 + + +PREHOOK: query: select key from piggy_table1 limit 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@piggy_table1 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-12-02_11-56-53_971_3590330924627005035/-mr-10000 +POSTHOOK: query: select key from piggy_table1 limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-12-02_11-56-53_971_3590330924627005035/-mr-10000 +238 + +311 + +PREHOOK: query: drop table piggy_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table piggy_table +POSTHOOK: type: DROPTABLE Index: ql/src/test/results/clientpositive/piggyback.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback.q.out (revision 0) @@ -0,0 +1,582 @@ +PREHOOK: query: create table piggy_table1 as select * from src +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src +POSTHOOK: query: create table piggy_table1 as select * from src +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@piggy_table1 +PREHOOK: query: explain select key from piggy_table1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from piggy_table1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF piggy_table1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + piggy_table1 + TableScan + alias: piggy_table1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key from piggy_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@piggy_table1 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_13-53-58_399_6727946341282218376/-mr-10000 +POSTHOOK: query: select key from piggy_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_13-53-58_399_6727946341282218376/-mr-10000 +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +265 +193 +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 +429 +374 +152 +469 +145 +495 +37 +327 +281 +277 +209 +15 +82 +403 +166 +417 +430 +252 +292 +219 +287 +153 +193 +338 +446 +459 +394 +237 +482 +174 +413 +494 +207 +199 +466 +208 +174 +399 +396 +247 +417 +489 +162 +377 +397 +309 +365 +266 +439 +342 +367 +325 +167 +195 +475 +17 +113 +155 +203 +339 +0 +455 +128 +311 +316 +57 +302 +205 +149 +438 +345 +129 +170 +20 +489 +157 +378 +221 +92 +111 +47 +72 +4 +280 +35 +427 +277 +208 +356 +399 +169 +382 +498 +125 +386 +437 +469 +192 +286 +187 +176 +54 +459 +51 +138 +103 +239 +213 +216 +430 +278 +176 +289 +221 +65 +318 +332 +311 +275 +137 +241 +83 +333 +180 +284 +12 +230 +181 +67 +260 +404 +384 +489 +353 +373 +272 +138 +217 +84 +348 +466 +58 +8 +411 +230 +208 +348 +24 +463 +431 +179 +172 +42 +129 +158 +119 +496 +0 +322 +197 +468 +393 +454 +100 +298 +199 +191 +418 +96 +26 +165 +327 +230 +205 +120 +131 +51 +404 +43 +436 +156 +469 +468 +308 +95 +196 +288 +481 +457 +98 +282 +197 +187 +318 +318 +409 +470 +137 +369 +316 +169 +413 +85 +77 +0 +490 +87 +364 +179 +118 +134 +395 +282 +138 +238 +419 +15 +118 +72 +90 +307 +19 +435 +10 +277 +273 +306 +224 +309 +389 +327 +242 +369 +392 +272 +331 +401 +242 +452 +177 +226 +5 +497 +402 +396 +317 +395 +58 +35 +336 +95 +11 +168 +34 +229 +233 +143 +472 +322 +498 +160 +195 +42 +321 +430 +119 +489 +458 +78 +76 +41 +223 +492 +149 +449 +218 +228 +138 +453 +30 +209 +64 +468 +76 +74 +342 +69 +230 +33 +368 +103 +296 +113 +216 +367 +344 +167 +274 +219 +239 +485 +116 +223 +256 +263 +70 +487 +480 +401 +288 +191 +5 +244 +438 +128 +467 +432 +202 +316 +229 +469 +463 +280 +2 +35 +283 +331 +235 +80 +44 +193 +321 +335 +104 +466 +366 +175 +403 +483 +53 +105 +257 +406 +409 +190 +406 +401 +114 +258 +90 +203 +262 +348 +424 +12 +396 +201 +217 +164 +431 +454 +478 +298 +125 +431 +164 +424 +187 +382 +5 +70 +397 +480 +291 +24 +351 +255 +104 +70 +163 +438 +119 +414 +200 +491 +237 +439 +360 +248 +479 +305 +417 +199 +444 +120 +429 +169 +443 +323 +325 +277 +230 +478 +178 +468 +310 +317 +333 +493 +460 +207 +249 +265 +480 +83 +136 +353 +172 +214 +462 +233 +406 +133 +175 +189 +454 +375 +401 +421 +407 +384 +256 +26 +134 +67 +384 +379 +18 +462 +492 +100 +298 +9 +341 +498 +146 +458 +362 +186 +285 +348 +167 +18 +273 +183 +281 +344 +97 +469 +315 +84 +28 +37 +448 +152 +348 +307 +194 +414 +477 +222 +126 +90 +169 +403 +400 +200 +97 +PREHOOK: query: show table extended like piggy_table1 +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like piggy_table1 +POSTHOOK: type: SHOW_TABLESTATUS +tableName:piggy_table1 +owner:pbutler +location:pfile:/home/pbutler/hive-git/build/ql/test/data/warehouse/piggy_table1 +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +rows:500 +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +lastAccessTime:0 +lastUpdateTime:1291154038000 + +PREHOOK: query: drop table piggy_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@piggy_table1 +PREHOOK: Output: default@piggy_table1 +POSTHOOK: query: drop table piggy_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Output: default@piggy_table1 Index: ql/src/test/results/clientpositive/piggyback_join.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback_join.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback_join.q.out (revision 0) @@ -0,0 +1,497 @@ +PREHOOK: query: create table piggy_table1 as select * from src +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src +POSTHOOK: query: create table piggy_table1 as select * from src +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@piggy_table1 +PREHOOK: query: create table piggy_table2 as select * from src1 +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src1 +POSTHOOK: query: create table piggy_table2 as select * from src1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@piggy_table2 +PREHOOK: query: create table piggy_table3 as select * from src1 +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src1 +POSTHOOK: query: create table piggy_table3 as select * from src1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@piggy_table3 +PREHOOK: query: explain +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF piggy_table2) (TOK_TABREF piggy_table1) (= (. (TOK_TABLE_OR_COL piggy_table2) key) (. (TOK_TABLE_OR_COL piggy_table1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL piggy_table2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL piggy_table2) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL piggy_table1) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-3 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + piggy_table1 + TableScan + alias: piggy_table1 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + piggy_table2 + TableScan + alias: piggy_table2 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@piggy_table1 +PREHOOK: Input: default@piggy_table2 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-09-03_491_3626721076355980777/-mr-10000 +POSTHOOK: query: select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Input: default@piggy_table2 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-09-03_491_3626721076355980777/-mr-10000 +128 val_128 +128 val_128 +128 val_128 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +224 val_224 +224 val_224 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +369 val_369 +369 val_369 +369 val_369 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: show table extended like piggy_table1 +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like piggy_table1 +POSTHOOK: type: SHOW_TABLESTATUS +tableName:piggy_table1 +owner:pbutler +location:pfile:/home/pbutler/hive-git/build/ql/test/data/warehouse/piggy_table1 +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +rows:500 +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +lastAccessTime:0 +lastUpdateTime:1291154932000 + +PREHOOK: query: show table extended like piggy_table2 +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like piggy_table2 +POSTHOOK: type: SHOW_TABLESTATUS +tableName:piggy_table2 +owner:pbutler +location:pfile:/home/pbutler/hive-git/build/ql/test/data/warehouse/piggy_table2 +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +rows:25 +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:216 +maxFileSize:216 +minFileSize:216 +lastAccessTime:0 +lastUpdateTime:1291154938000 + +PREHOOK: query: explain +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value, + piggy_table3.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +join + piggy_table3 +on + (piggy_table1.value = piggy_table3.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value, + piggy_table3.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +join + piggy_table3 +on + (piggy_table1.value = piggy_table3.value) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF piggy_table2) (TOK_TABREF piggy_table1) (= (. (TOK_TABLE_OR_COL piggy_table2) key) (. (TOK_TABLE_OR_COL piggy_table1) key))) (TOK_TABREF piggy_table3) (= (. (TOK_TABLE_OR_COL piggy_table1) value) (. (TOK_TABLE_OR_COL piggy_table3) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL piggy_table2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL piggy_table2) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL piggy_table1) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL piggy_table3) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + piggy_table1 + TableScan + alias: piggy_table1 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + piggy_table2 + TableScan + alias: piggy_table2 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col5 + type: string + sort order: + + Map-reduce partition columns: + expr: _col5 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col5 + type: string + piggy_table3 + TableScan + alias: piggy_table3 + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + value expressions: + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col5, _col9 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col5 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value, + piggy_table3.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +join + piggy_table3 +on + (piggy_table1.value = piggy_table3.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@piggy_table1 +PREHOOK: Input: default@piggy_table2 +PREHOOK: Input: default@piggy_table3 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-09-10_935_5430344118908313154/-mr-10000 +POSTHOOK: query: select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value, + piggy_table3.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +join + piggy_table3 +on + (piggy_table1.value = piggy_table3.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Input: default@piggy_table2 +POSTHOOK: Input: default@piggy_table3 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-09-10_935_5430344118908313154/-mr-10000 +146 val_146 val_146 val_146 +146 val_146 val_146 val_146 +150 val_150 val_150 val_150 +213 val_213 val_213 val_213 +213 val_213 val_213 val_213 +238 val_238 val_238 val_238 +238 val_238 val_238 val_238 +255 val_255 val_255 val_255 +255 val_255 val_255 val_255 +273 val_273 val_273 val_273 +273 val_273 val_273 val_273 +273 val_273 val_273 val_273 +278 val_278 val_278 val_278 +278 val_278 val_278 val_278 +311 val_311 val_311 val_311 +311 val_311 val_311 val_311 +311 val_311 val_311 val_311 +401 val_401 val_401 val_401 +401 val_401 val_401 val_401 +401 val_401 val_401 val_401 +401 val_401 val_401 val_401 +401 val_401 val_401 val_401 +406 val_406 val_406 val_406 +406 val_406 val_406 val_406 +406 val_406 val_406 val_406 +406 val_406 val_406 val_406 +66 val_66 val_66 val_66 +98 val_98 val_98 val_98 +98 val_98 val_98 val_98 +PREHOOK: query: show table extended like piggy_table3 +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like piggy_table3 +POSTHOOK: type: SHOW_TABLESTATUS +tableName:piggy_table3 +owner:pbutler +location:pfile:/home/pbutler/hive-git/build/ql/test/data/warehouse/piggy_table3 +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +rows:25 +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:216 +maxFileSize:216 +minFileSize:216 +lastAccessTime:0 +lastUpdateTime:1291154943000 + +PREHOOK: query: drop table piggy_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table piggy_table +POSTHOOK: type: DROPTABLE Index: ql/src/test/results/clientpositive/piggyback_subq.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback_subq.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback_subq.q.out (revision 0) @@ -0,0 +1,73 @@ +PREHOOK: query: explain select key from (select key from src1) subq +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from (select key from src1) subq +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:src1 + TableScan + alias: src1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key from (select key from src1) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-12-02_13-16-18_115_1726118342393682081/-mr-10000 +POSTHOOK: query: select key from (select key from src1) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-12-02_13-16-18_115_1726118342393682081/-mr-10000 +238 + +311 + + + +255 +278 +98 + + + +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 + + + Index: ql/src/test/results/clientpositive/piggyback_gby.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback_gby.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback_gby.q.out (revision 0) @@ -0,0 +1,429 @@ +PREHOOK: query: create table piggy_table1 as select * from src +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src +POSTHOOK: query: create table piggy_table1 as select * from src +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@piggy_table1 +PREHOOK: query: explain select max(value) from piggy_table1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(value) from piggy_table1 group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF piggy_table1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + piggy_table1 + TableScan + alias: piggy_table1 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: max(value) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: max(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select max(value) from piggy_table1 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@piggy_table1 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-19-35_039_5848861792516284918/-mr-10000 +POSTHOOK: query: select max(value) from piggy_table1 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-11-30_14-19-35_039_5848861792516284918/-mr-10000 +val_0 +val_10 +val_100 +val_103 +val_104 +val_105 +val_11 +val_111 +val_113 +val_114 +val_116 +val_118 +val_119 +val_12 +val_120 +val_125 +val_126 +val_128 +val_129 +val_131 +val_133 +val_134 +val_136 +val_137 +val_138 +val_143 +val_145 +val_146 +val_149 +val_15 +val_150 +val_152 +val_153 +val_155 +val_156 +val_157 +val_158 +val_160 +val_162 +val_163 +val_164 +val_165 +val_166 +val_167 +val_168 +val_169 +val_17 +val_170 +val_172 +val_174 +val_175 +val_176 +val_177 +val_178 +val_179 +val_18 +val_180 +val_181 +val_183 +val_186 +val_187 +val_189 +val_19 +val_190 +val_191 +val_192 +val_193 +val_194 +val_195 +val_196 +val_197 +val_199 +val_2 +val_20 +val_200 +val_201 +val_202 +val_203 +val_205 +val_207 +val_208 +val_209 +val_213 +val_214 +val_216 +val_217 +val_218 +val_219 +val_221 +val_222 +val_223 +val_224 +val_226 +val_228 +val_229 +val_230 +val_233 +val_235 +val_237 +val_238 +val_239 +val_24 +val_241 +val_242 +val_244 +val_247 +val_248 +val_249 +val_252 +val_255 +val_256 +val_257 +val_258 +val_26 +val_260 +val_262 +val_263 +val_265 +val_266 +val_27 +val_272 +val_273 +val_274 +val_275 +val_277 +val_278 +val_28 +val_280 +val_281 +val_282 +val_283 +val_284 +val_285 +val_286 +val_287 +val_288 +val_289 +val_291 +val_292 +val_296 +val_298 +val_30 +val_302 +val_305 +val_306 +val_307 +val_308 +val_309 +val_310 +val_311 +val_315 +val_316 +val_317 +val_318 +val_321 +val_322 +val_323 +val_325 +val_327 +val_33 +val_331 +val_332 +val_333 +val_335 +val_336 +val_338 +val_339 +val_34 +val_341 +val_342 +val_344 +val_345 +val_348 +val_35 +val_351 +val_353 +val_356 +val_360 +val_362 +val_364 +val_365 +val_366 +val_367 +val_368 +val_369 +val_37 +val_373 +val_374 +val_375 +val_377 +val_378 +val_379 +val_382 +val_384 +val_386 +val_389 +val_392 +val_393 +val_394 +val_395 +val_396 +val_397 +val_399 +val_4 +val_400 +val_401 +val_402 +val_403 +val_404 +val_406 +val_407 +val_409 +val_41 +val_411 +val_413 +val_414 +val_417 +val_418 +val_419 +val_42 +val_421 +val_424 +val_427 +val_429 +val_43 +val_430 +val_431 +val_432 +val_435 +val_436 +val_437 +val_438 +val_439 +val_44 +val_443 +val_444 +val_446 +val_448 +val_449 +val_452 +val_453 +val_454 +val_455 +val_457 +val_458 +val_459 +val_460 +val_462 +val_463 +val_466 +val_467 +val_468 +val_469 +val_47 +val_470 +val_472 +val_475 +val_477 +val_478 +val_479 +val_480 +val_481 +val_482 +val_483 +val_484 +val_485 +val_487 +val_489 +val_490 +val_491 +val_492 +val_493 +val_494 +val_495 +val_496 +val_497 +val_498 +val_5 +val_51 +val_53 +val_54 +val_57 +val_58 +val_64 +val_65 +val_66 +val_67 +val_69 +val_70 +val_72 +val_74 +val_76 +val_77 +val_78 +val_8 +val_80 +val_82 +val_83 +val_84 +val_85 +val_86 +val_87 +val_9 +val_90 +val_92 +val_95 +val_96 +val_97 +val_98 +PREHOOK: query: show table extended like piggy_table1 +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like piggy_table1 +POSTHOOK: type: SHOW_TABLESTATUS +tableName:piggy_table1 +owner:pbutler +location:pfile:/home/pbutler/hive-git/build/ql/test/data/warehouse/piggy_table1 +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +rows:500 +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +lastAccessTime:0 +lastUpdateTime:1291155574000 + +PREHOOK: query: drop table piggy_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@piggy_table1 +PREHOOK: Output: default@piggy_table1 +POSTHOOK: query: drop table piggy_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@piggy_table1 +POSTHOOK: Output: default@piggy_table1 Index: ql/src/test/results/clientpositive/piggyback_union.q.out =================================================================== --- ql/src/test/results/clientpositive/piggyback_union.q.out (revision 0) +++ ql/src/test/results/clientpositive/piggyback_union.q.out (revision 0) @@ -0,0 +1,596 @@ +PREHOOK: query: explain select key from (select key from src1 union all select key from src) subq +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from (select key from src1 union all select key from src) subq +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:subq-subquery1:src1 + TableScan + alias: src1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + null-subquery2:subq-subquery2:src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key from (select key from src1 union all select key from src) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/tmp/pbutler/hive_2010-11-18_16-51-04_007_3527823884216367843/-mr-10000 +POSTHOOK: query: select key from (select key from src1 union all select key from src) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/tmp/pbutler/hive_2010-11-18_16-51-04_007_3527823884216367843/-mr-10000 +238 + +311 + + + +255 +278 +98 + + + +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 + + + +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +265 +193 +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 +429 +374 +152 +469 +145 +495 +37 +327 +281 +277 +209 +15 +82 +403 +166 +417 +430 +252 +292 +219 +287 +153 +193 +338 +446 +459 +394 +237 +482 +174 +413 +494 +207 +199 +466 +208 +174 +399 +396 +247 +417 +489 +162 +377 +397 +309 +365 +266 +439 +342 +367 +325 +167 +195 +475 +17 +113 +155 +203 +339 +0 +455 +128 +311 +316 +57 +302 +205 +149 +438 +345 +129 +170 +20 +489 +157 +378 +221 +92 +111 +47 +72 +4 +280 +35 +427 +277 +208 +356 +399 +169 +382 +498 +125 +386 +437 +469 +192 +286 +187 +176 +54 +459 +51 +138 +103 +239 +213 +216 +430 +278 +176 +289 +221 +65 +318 +332 +311 +275 +137 +241 +83 +333 +180 +284 +12 +230 +181 +67 +260 +404 +384 +489 +353 +373 +272 +138 +217 +84 +348 +466 +58 +8 +411 +230 +208 +348 +24 +463 +431 +179 +172 +42 +129 +158 +119 +496 +0 +322 +197 +468 +393 +454 +100 +298 +199 +191 +418 +96 +26 +165 +327 +230 +205 +120 +131 +51 +404 +43 +436 +156 +469 +468 +308 +95 +196 +288 +481 +457 +98 +282 +197 +187 +318 +318 +409 +470 +137 +369 +316 +169 +413 +85 +77 +0 +490 +87 +364 +179 +118 +134 +395 +282 +138 +238 +419 +15 +118 +72 +90 +307 +19 +435 +10 +277 +273 +306 +224 +309 +389 +327 +242 +369 +392 +272 +331 +401 +242 +452 +177 +226 +5 +497 +402 +396 +317 +395 +58 +35 +336 +95 +11 +168 +34 +229 +233 +143 +472 +322 +498 +160 +195 +42 +321 +430 +119 +489 +458 +78 +76 +41 +223 +492 +149 +449 +218 +228 +138 +453 +30 +209 +64 +468 +76 +74 +342 +69 +230 +33 +368 +103 +296 +113 +216 +367 +344 +167 +274 +219 +239 +485 +116 +223 +256 +263 +70 +487 +480 +401 +288 +191 +5 +244 +438 +128 +467 +432 +202 +316 +229 +469 +463 +280 +2 +35 +283 +331 +235 +80 +44 +193 +321 +335 +104 +466 +366 +175 +403 +483 +53 +105 +257 +406 +409 +190 +406 +401 +114 +258 +90 +203 +262 +348 +424 +12 +396 +201 +217 +164 +431 +454 +478 +298 +125 +431 +164 +424 +187 +382 +5 +70 +397 +480 +291 +24 +351 +255 +104 +70 +163 +438 +119 +414 +200 +491 +237 +439 +360 +248 +479 +305 +417 +199 +444 +120 +429 +169 +443 +323 +325 +277 +230 +478 +178 +468 +310 +317 +333 +493 +460 +207 +249 +265 +480 +83 +136 +353 +172 +214 +462 +233 +406 +133 +175 +189 +454 +375 +401 +421 +407 +384 +256 +26 +134 +67 +384 +379 +18 +462 +492 +100 +298 +9 +341 +498 +146 +458 +362 +186 +285 +348 +167 +18 +273 +183 +281 +344 +97 +469 +315 +84 +28 +37 +448 +152 +348 +307 +194 +414 +477 +222 +126 +90 +169 +403 +400 +200 +97 Index: ql/src/test/queries/clientpositive/piggyback_subq.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback_subq.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback_subq.q (revision 0) @@ -0,0 +1,7 @@ + +set hive.stats.autogather.read=true; + +explain select key from (select key from src1) subq; + +select key from (select key from src1) subq; + Index: ql/src/test/queries/clientpositive/piggyback_limit.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback_limit.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback_limit.q (revision 0) @@ -0,0 +1,11 @@ + +create table piggy_table1 as select * from src1; + +set hive.stats.autogather.read=true; + +explain select key from piggy_table1 limit 4; + +select key from piggy_table1 limit 4; + +drop table piggy_table; + Index: ql/src/test/queries/clientpositive/piggyback.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback.q (revision 0) @@ -0,0 +1,13 @@ + +create table piggy_table1 as select * from src; + +set hive.stats.autogather.read=true; + +explain select key from piggy_table1; + +select key from piggy_table1; + +show table extended like piggy_table1; + +drop table piggy_table1; + Index: ql/src/test/queries/clientpositive/piggyback_gby.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback_gby.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback_gby.q (revision 0) @@ -0,0 +1,13 @@ + +create table piggy_table1 as select * from src; + +set hive.stats.autogather.read=true; + +explain select max(value) from piggy_table1 group by key; + +select max(value) from piggy_table1 group by key; + +show table extended like piggy_table1; + +drop table piggy_table1; + Index: ql/src/test/queries/clientpositive/piggyback_union.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback_union.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback_union.q (revision 0) @@ -0,0 +1,7 @@ + +set hive.stats.autogather.read=true; + +explain select key from (select key from src1 union all select key from src) subq; + +select key from (select key from src1 union all select key from src) subq; + Index: ql/src/test/queries/clientpositive/piggyback_join.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback_join.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback_join.q (revision 0) @@ -0,0 +1,71 @@ + +create table piggy_table1 as select * from src; +create table piggy_table2 as select * from src1; +create table piggy_table3 as select * from src1; + +set hive.stats.autogather.read=true; + +explain +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key); + +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key); + +show table extended like piggy_table1; + +show table extended like piggy_table2; + +explain +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value, + piggy_table3.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +join + piggy_table3 +on + (piggy_table1.value = piggy_table3.value); + +select + piggy_table2.key, + piggy_table2.value, + piggy_table1.value, + piggy_table3.value +from + piggy_table2 +join + piggy_table1 +on + (piggy_table2.key = piggy_table1.key) +join + piggy_table3 +on + (piggy_table1.value = piggy_table3.value); + +show table extended like piggy_table3; + +drop table piggy_table; + Index: ql/src/test/queries/clientpositive/piggyback_create.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback_create.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback_create.q (revision 0) @@ -0,0 +1,15 @@ + +create table piggy_table1 as select * from src; + +set hive.stats.autogather.read=true; + +explain create table my_table as select key from piggy_table1; + +create table my_table as select key from piggy_table1; + +show table extended like piggy_table1; + +select * from my_table; + +drop table piggy_table; + Index: ql/src/test/queries/clientpositive/piggyback_part.q =================================================================== --- ql/src/test/queries/clientpositive/piggyback_part.q (revision 0) +++ ql/src/test/queries/clientpositive/piggyback_part.q (revision 0) @@ -0,0 +1,18 @@ + +CREATE TABLE piggy_part (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE; + +INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11; +INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12; +INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11; +INSERT OVERWRITE TABLE piggy_part PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12; + +set hive.stats.autogather.read=true; + +explain select key from piggy_part where ds = '2008-04-08'; + +select key from piggy_part where ds = '2008-04-08'; + +show table extended like piggy_part; + +drop table piggy_part; + Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (working copy) @@ -222,7 +222,7 @@ if (prunedParts == null) { prunedParts = PartitionPruner.prune(tbl, pGraphContext .getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); + pGraphContext.getPrunedPartitions(), pGraphContext); pGraphContext.getOpToPartList().put(tso, prunedParts); } } catch (HiveException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java (working copy) @@ -112,7 +112,7 @@ prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top), ppr_pred, owc.getParseContext().getConf(), (String) owc.getParseContext().getTopOps().keySet() - .toArray()[0], owc.getParseContext().getPrunedPartitions()); + .toArray()[0], owc.getParseContext().getPrunedPartitions(), owc.getParseContext()); if (prunedPartList != null) { owc.getParseContext().getOpToPartList().put(top, prunedPartList); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy) @@ -76,6 +76,7 @@ ctx.setCurrAliasId(currAliasId); mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId)); + currWork.setGatheringStats(true); QBParseInfo parseInfo = parseCtx.getQB().getParseInfo(); if (parseInfo.isAnalyzeCommand()) { @@ -88,7 +89,6 @@ Task statsTask = TaskFactory.get(statsWork, parseCtx.getConf()); currTask.addDependentTask(statsTask); ctx.getRootTasks().add(currTask); - currWork.setGatheringStats(true); // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, // and pass it to setTaskPlan as the last parameter Set confirmedPartns = new HashSet(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -50,6 +50,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; /** * The transformation step that does partition pruning. @@ -151,7 +153,8 @@ */ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, - Map prunedPartitionsMap) throws HiveException { + Map prunedPartitionsMap, + ParseContext parseCtx) throws HiveException { LOG.trace("Started pruning partiton"); LOG.trace("tabname = " + tab.getTableName()); LOG.trace("prune Expression = " + prunerExpr); @@ -237,6 +240,13 @@ // Now return the set of partitions ret = new PrunedPartitionList(true_parts, unkn_parts, denied_parts); prunedPartitionsMap.put(key, ret); + + List partitions = new ArrayList(); + partitions.addAll(true_parts); + partitions.addAll(unkn_parts); + tableSpec ts = new tableSpec(tab, tab.getTableName(), partitions); + parseCtx.setInputTableSpecs(alias, ts); + return ret; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -550,7 +550,7 @@ if (partsList == null) { partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), - alias_id, parseCtx.getPrunedPartitions()); + alias_id, parseCtx.getPrunedPartitions(), parseCtx); parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList); } } catch (SemanticException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (working copy) @@ -207,7 +207,7 @@ if (partsList == null) { partsList = PartitionPruner.prune(destTable, pGraphContext .getOpToPartPruner().get(ts), pGraphContext.getConf(), table, - pGraphContext.getPrunedPartitions()); + pGraphContext.getPrunedPartitions(), pGraphContext); pGraphContext.getOpToPartList().put(ts, partsList); } } catch (HiveException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy) @@ -204,7 +204,7 @@ prunedParts = pGraphContext.getOpToPartList().get(tso); if (prunedParts == null) { prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); + pGraphContext.getPrunedPartitions(), pGraphContext); pGraphContext.getOpToPartList().put(tso, prunedParts); } } catch (HiveException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -111,6 +111,7 @@ import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc; import org.apache.hadoop.hive.ql.plan.UnlockTableDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.stats.StatsSetupConst; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -1682,6 +1683,8 @@ outputFormattCls = tbl.getOutputFormatClass().getName(); } + String rowCount = tbl.getProperty(StatsSetupConst.ROW_COUNT); + String owner = tbl.getOwner(); List cols = tbl.getCols(); String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols); @@ -1704,6 +1707,10 @@ outStream.write(terminator); outStream.writeBytes("columns:" + ddlCols); outStream.write(terminator); + if (rowCount != null) { + outStream.writeBytes("rows:" + rowCount); + } + outStream.write(terminator); outStream.writeBytes("partitioned:" + isPartitioned); outStream.write(terminator); outStream.writeBytes("partitionColumns:" + partitionCols); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (working copy) @@ -309,6 +309,10 @@ return destToLimit.get(dest); } + public HashMap getDestToLimit() { + return destToLimit; + } + /** * @return the outerQueryLimit */ Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -583,10 +583,19 @@ public static enum SpecType {TABLE_ONLY, STATIC_PARTITION, DYNAMIC_PARTITION}; public SpecType specType; + /* Constructor for a "dummy" tableSpec used for stats publishing */ + public tableSpec(Table tableHandle, String tableName, List partitions) { + this.tableName = tableName; + this.tableHandle = tableHandle; + this.partitions = partitions; + } + public tableSpec(Hive db, HiveConf conf, ASTNode ast) throws SemanticException { - assert (ast.getToken().getType() == HiveParser.TOK_TAB || ast.getToken().getType() == HiveParser.TOK_TABTYPE); + assert (ast.getToken().getType() == HiveParser.TOK_TAB + || ast.getToken().getType() == HiveParser.TOK_TABTYPE + || ast.getToken().getType() == HiveParser.TOK_TABREF) : ast.dump(); int childIndex = 0; numDynParts = 0; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -28,6 +28,8 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; +import java.util.LinkedList; +import java.util.Queue; import java.util.Map; import java.util.Set; import java.util.TreeSet; @@ -41,8 +43,8 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -92,6 +94,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; @@ -102,12 +105,12 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc; @@ -122,6 +125,7 @@ import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; @@ -140,27 +144,27 @@ import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ScriptDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; +import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -786,17 +790,44 @@ qb.getMetaData().setSrcForAlias(alias, tab); + tableSpec ts = null; if (qb.getParseInfo().isAnalyzeCommand()) { - tableSpec ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0)); - if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions - try { - ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec); - } catch (HiveException e) { - throw new SemanticException("Cannot get partitions for " + ts.partSpec, e); + ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0)); + } else { + // we are piggybacking stats tracking on a TableScanOperator + if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { + // CREATE TABLE statement + ts = new tableSpec(db, conf, (ASTNode) ast.getChild(2).getChild(0).getChild(0)); + } else if (ast.getToken().getType() == HiveParser.TOK_QUERY) { + // SELECT query + if (ast.getChild(0).getChild(0).getType() == HiveParser.TOK_SUBQUERY) { + // If we are selecting from a subquery, don't gather stats + continue; + } else if (ast.getChild(0).getChild(0).getType() == HiveParser.TOK_JOIN) { + // If this is a join, we have to figure out which branch of the AST + // is represented by the current value of `alias`. + if (ast.getChild(0).getChild(0).getChild(0).getChild(0).getText() == alias) { + ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0).getChild(0).getChild(0)); + } else { + ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0).getChild(0).getChild(1)); + } + } else { + // Assume it's a regular SELECT query + ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0).getChild(0)); } + } else { + // We should never get here + assert false; } - qb.getParseInfo().addTableSpec(alias, ts); } + if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions + try { + ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec); + } catch (HiveException e) { + throw new SemanticException("Cannot get partitions for " + ts.partSpec, e); + } + } + qb.getParseInfo().addTableSpec(alias, ts); } LOG.info("Get metadata for subqueries"); @@ -5692,7 +5723,9 @@ // Create the root of the operator tree TableScanDesc tsDesc = new TableScanDesc(alias, vcList); - setupStats(tsDesc, qb.getParseInfo(), tab, alias); + if (qb.getParseInfo().getTableSpec(alias) != null) { + setupStats(tsDesc, qb.getParseInfo(), tab, alias); + } top = putOpInsertMap(OperatorFactory.get(tsDesc, new RowSchema(rwsch.getColumnInfos())), rwsch); @@ -5848,29 +5881,47 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias) throws SemanticException { - if (!qbp.isAnalyzeCommand()) { + if (qbp.isAnalyzeCommand()) { + tsDesc.setGatherStats(true); + } else if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHERREAD) && + qbp.getDestToLimit().isEmpty()) { + // if we are autogathering stats on read and this query is NOT limited, + // we gather stats on this TableScanOperator + // TODO: is getOuterQueryLimit the right method here? + tsDesc.setGatherStats(true); + } else { tsDesc.setGatherStats(false); - } else { - tsDesc.setGatherStats(true); + return; + } - String tblName = tab.getTableName(); - tableSpec tblSpec = qbp.getTableSpec(alias); - Map partSpec = tblSpec.getPartSpec(); + String tblName = tab.getTableName(); + tableSpec tblSpec = qbp.getTableSpec(alias); + Map partSpec = tblSpec.getPartSpec(); - if (partSpec != null) { - List cols = new ArrayList(); - cols.addAll(partSpec.keySet()); - tsDesc.setPartColumns(cols); - } + // Theoretically the key prefix could be any unique string shared + // between TableScanOperator (when publishing) and StatsTask (when aggregating). + // Here we use + // table_name + partitionSec + // as the prefix for easy of read during explain and debugging. + // Currently, partition spec can only be static partition. + String k = tblName + Path.SEPARATOR; + tsDesc.setStatsAggPrefix(k); - // Theoretically the key prefix could be any unique string shared - // between TableScanOperator (when publishing) and StatsTask (when aggregating). - // Here we use - // table_name + partitionSec - // as the prefix for easy of read during explain and debugging. - // Currently, partition spec can only be static partition. - String k = tblName + Path.SEPARATOR; - tsDesc.setStatsAggPrefix(k); + if (!qbp.isAnalyzeCommand()) { + // Get partition names from the table handle + List partitions = tab.getPartCols(); + List partNames = new ArrayList(); + for (FieldSchema fs : partitions) { + partNames.add(fs.getName()); + } + tsDesc.setPartColumns(partNames); + } else { + // Get partition names from the partition spec + if (partSpec != null) { + List cols = new ArrayList(); + cols.addAll(partSpec.keySet()); + tsDesc.setPartColumns(cols); + } // set up WritenEntity for replication outputs.add(new WriteEntity(tab)); @@ -6095,8 +6146,52 @@ } } + /** + * Add StatsTask to all MapRedTasks with a TableScanOperator in the given list + * of tasks. + * + * @param tasks a list of tasks + * @param conf a hive configuration object + * @param pctx the current parse context + */ + private void addStatsTask(List> rootTasks, HiveConf conf, ParseContext pctx) { + Queue> tasks = new LinkedList>(); + tasks.addAll(rootTasks); + while (!tasks.isEmpty()) { + Task task = tasks.remove(); + if (MapRedTask.class.isInstance(task)) { + MapRedTask mrTask = (MapRedTask) task; + MapredWork mrWork = (MapredWork) mrTask.getWork(); + + if (task.getChildTasks() != null) { + tasks.addAll(task.getChildTasks()); + } + LinkedHashMap> aliasToWork = mrWork.getAliasToWork(); + + for (String key : aliasToWork.keySet()) { + Queue> opsToProcess = new LinkedList>(); + Operator op = aliasToWork.get(key); + if (TableScanOperator.class.isInstance(op)) { + TableScanOperator tso = (TableScanOperator) op; + TableScanDesc tsd = (TableScanDesc) op.getConf(); + if (tsd != null) { + if (tsd.isGatherStats()) { + tableSpec ts = pctx.getInputTableSpecs(key); + StatsWork statsWork = new StatsWork(ts); + String k = key + Path.SEPARATOR; + statsWork.setAggKey(k); + Task statsTask = TaskFactory.get(statsWork, conf); + task.addDependentTask(statsTask); + } + } + } + } + } + } + } + @SuppressWarnings("nls") - private void genMapRedTasks(QB qb) throws SemanticException { + private void genMapRedTasks(QB qb, ParseContext pctx) throws SemanticException { FetchWork fetch = null; List> mvTask = new ArrayList>(); FetchTask fetchTask = null; @@ -6135,7 +6230,7 @@ if (partsList == null) { partsList = PartitionPruner.prune(topToTable.get(ts), opToPartPruner.get(ts), conf, (String) topOps.keySet() - .toArray()[0], prunedPartitions); + .toArray()[0], prunedPartitions, null); opToPartList.put(ts, partsList); } } catch (HiveException e) { @@ -6238,7 +6333,7 @@ GenMRProcContext procCtx = new GenMRProcContext( conf, new HashMap, Task>(), - new ArrayList>(), getParseContext(), + new ArrayList>(), pctx, mvTask, rootTasks, new LinkedHashMap, GenMapRedCtx>(), inputs, outputs); @@ -6279,6 +6374,11 @@ topNodes.addAll(topOps.values()); ogw.startWalking(topNodes, null); + if (!qb.getParseInfo().isAnalyzeCommand() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHERREAD)) { + // StatsTask for ANALYZE is added elsewhere + addStatsTask(rootTasks, conf, pctx); + } + // reduce sink does not have any kids - since the plan by now has been // broken up into multiple // tasks, iterate over all tasks. @@ -6556,7 +6656,7 @@ // At this point we have the complete operator tree // from which we want to find the reduce operator - genMapRedTasks(qb); + genMapRedTasks(qb, pCtx); LOG.info("Completed plan generation"); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1044064) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.metadata.Partition; /** * Parse Context: The current parse context. This is passed to the optimizer @@ -77,6 +79,17 @@ private Map> groupOpToInputTables; private Map prunedPartitions; + private Map aliasToInputTableSpecs; + + public void setInputTableSpecs(String key, tableSpec ts) { + aliasToInputTableSpecs.put(key, ts); + } + + public tableSpec getInputTableSpecs(String key) { + tableSpec ts = aliasToInputTableSpecs.get(key); + return ts; + } + /** * The lineage information. */ @@ -163,6 +176,7 @@ this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; hasNonPartCols = false; this.groupOpToInputTables = new HashMap>(); + this.aliasToInputTableSpecs = new HashMap(); this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; this.opToSamplePruner = opToSamplePruner;