Description
create table src3(key string, value string) partitioned by (pt string)
row format delimited fields terminated by ',';
ALTER TABLE src3 ADD IF NOT EXISTS PARTITION (pt='20110911000000') ;
ALTER TABLE src3 ADD IF NOT EXISTS PARTITION (pt='20110912000000') ;
ALTER TABLE src3 ADD IF NOT EXISTS PARTITION (pt='20110913000000') ;
explain extended
select user_id
from
(
select
cast(key as int) as user_id
,case when (value like 'aaa%' or value like 'vvv%')
then 1
else 0 end as tag_student
from src3
) sub
where sub.tag_student > 0;
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
sub:src3
TableScan
alias: src3
Filter Operator
isSamplingPred: false
predicate:
expr: (CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END > 0)
type: boolean
Select Operator
expressions:
expr: UDFToInteger(key)
type: int
expr: CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END
type: int
outputColumnNames: _col0, _col1
Filter Operator
isSamplingPred: false
predicate:
expr: (_col1 > 0)
type: boolean
Select Operator
expressions:
expr: _col0
type: int
outputColumnNames: _col0
File Output Operator
compressed: false
GlobalTableId: 0
directory: hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-10-11_19-26-12_894_9085644225727185586/-ext-10001
NumFilesPerFileSink: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0
columns.types int
serialization.format 1
TotalFiles: 1
MultiFileSpray: false
Needs Tagging: false
Stage: Stage-0
Fetch Operator
limit: -1
if we set hive.optimize.ppd=false;
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
sub:src3
TableScan
alias: src3
Select Operator
expressions:
expr: UDFToInteger(key)
type: int
expr: CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END
type: int
outputColumnNames: _col0, _col1
Filter Operator
isSamplingPred: false
predicate:
expr: (_col1 > 0)
type: boolean
Select Operator
expressions:
expr: _col0
type: int
outputColumnNames: _col0
File Output Operator
compressed: false
GlobalTableId: 0
directory: hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-10-11_19-27-22_527_1729287213481398480/-ext-10001
NumFilesPerFileSink: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
columns _col0
columns.types int
serialization.format 1
TotalFiles: 1
MultiFileSpray: false
Needs Tagging: false
Path -> Alias:
hdfs://localhost:54310/user/hive/warehouse/src3/pt=20110911000000 [sub:src3]
hdfs://localhost:54310/user/hive/warehouse/src3/pt=20110912000000 [sub:src3]
hdfs://localhost:54310/user/hive/warehouse/src3/pt=20110913000000 [sub:src3]
Path -> Partition:
hdfs://localhost:54310/user/hive/warehouse/src3/pt=20110911000000
Partition
base file name: pt=20110911000000