diff --git ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out deleted file mode 100644 index 4f3e8f79e9..0000000000 --- ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out +++ /dev/null @@ -1,123 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1 -PREHOOK: query: explain -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -PREHOOK: type: QUERY -POSTHOOK: query: explain -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src - TableScan - alias: src - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 1002 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2004 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 11 Data size: 1102 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1102 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 1102 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-2 - Stats-Aggr Operator - -PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest1 -POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -404554174174 diff --git ql/src/test/results/clientpositive/combine2_hadoop20.q.out ql/src/test/results/clientpositive/combine2_hadoop20.q.out deleted file mode 100644 index 1d6024eaaa..0000000000 --- ql/src/test/results/clientpositive/combine2_hadoop20.q.out +++ /dev/null @@ -1,723 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: -- EXCLUDE_OS_WINDOWS --- excluded on windows because of difference in file name encoding logic - --- SORT_QUERY_RESULTS - -create table combine2(key string) partitioned by (value string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- EXCLUDE_OS_WINDOWS --- excluded on windows because of difference in file name encoding logic - --- SORT_QUERY_RESULTS - -create table combine2(key string) partitioned by (value string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@combine2 -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results results of this test. --- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. - -insert overwrite table combine2 partition(value) -select * from ( - select key, value from src where key < 10 - union all - select key, '|' as value from src where key = 11 - union all - select key, '2010-04-21 09:45:00' value from src where key = 19) s -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@combine2 -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results results of this test. --- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. - -insert overwrite table combine2 partition(value) -select * from ( - select key, value from src where key < 10 - union all - select key, '|' as value from src where key = 11 - union all - select key, '2010-04-21 09:45:00' value from src where key = 19) s -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@combine2@value=2010-04-21 09%3A45%3A00 -POSTHOOK: Output: default@combine2@value=val_0 -POSTHOOK: Output: default@combine2@value=val_2 -POSTHOOK: Output: default@combine2@value=val_4 -POSTHOOK: Output: default@combine2@value=val_5 -POSTHOOK: Output: default@combine2@value=val_8 -POSTHOOK: Output: default@combine2@value=val_9 -POSTHOOK: Output: default@combine2@value=| -POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: show partitions combine2 -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@combine2 -POSTHOOK: query: show partitions combine2 -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@combine2 -value=2010-04-21 09%3A45%3A00 -value=val_0 -value=val_2 -value=val_4 -value=val_5 -value=val_8 -value=val_9 -value=| -PREHOOK: query: explain -select key, value from combine2 where value is not null -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, value from combine2 where value is not null -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: combine2 - Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key, value from combine2 where value is not null -PREHOOK: type: QUERY -PREHOOK: Input: default@combine2 -PREHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 -PREHOOK: Input: default@combine2@value=val_0 -PREHOOK: Input: default@combine2@value=val_2 -PREHOOK: Input: default@combine2@value=val_4 -PREHOOK: Input: default@combine2@value=val_5 -PREHOOK: Input: default@combine2@value=val_8 -PREHOOK: Input: default@combine2@value=val_9 -PREHOOK: Input: default@combine2@value=| -#### A masked pattern was here #### -POSTHOOK: query: select key, value from combine2 where value is not null -POSTHOOK: type: QUERY -POSTHOOK: Input: default@combine2 -POSTHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 -POSTHOOK: Input: default@combine2@value=val_0 -POSTHOOK: Input: default@combine2@value=val_2 -POSTHOOK: Input: default@combine2@value=val_4 -POSTHOOK: Input: default@combine2@value=val_5 -POSTHOOK: Input: default@combine2@value=val_8 -POSTHOOK: Input: default@combine2@value=val_9 -POSTHOOK: Input: default@combine2@value=| -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -11 | -19 2010-04-21 09:45:00 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain extended -select count(1) from combine2 where value is not null -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select count(1) from combine2 where value is not null -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - combine2 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_WHERE - TOK_FUNCTION - TOK_ISNOTNULL - TOK_TABLE_OR_COL - value - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: combine2 - Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: value=2010-04-21 09%3A45%3A00 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value 2010-04-21 09:45:00 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 1 - partition_columns value - partition_columns.types string - rawDataSize 2 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 -#### A masked pattern was here #### - Partition - base file name: value=val_0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value val_0 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 3 - partition_columns value - partition_columns.types string - rawDataSize 3 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 6 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 -#### A masked pattern was here #### - Partition - base file name: value=val_2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value val_2 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 1 - partition_columns value - partition_columns.types string - rawDataSize 1 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 -#### A masked pattern was here #### - Partition - base file name: value=val_4 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value val_4 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 1 - partition_columns value - partition_columns.types string - rawDataSize 1 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 -#### A masked pattern was here #### - Partition - base file name: value=val_5 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value val_5 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 3 - partition_columns value - partition_columns.types string - rawDataSize 3 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 6 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 -#### A masked pattern was here #### - Partition - base file name: value=val_8 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value val_8 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 1 - partition_columns value - partition_columns.types string - rawDataSize 1 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 -#### A masked pattern was here #### - Partition - base file name: value=val_9 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value val_9 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 1 - partition_columns value - partition_columns.types string - rawDataSize 1 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 -#### A masked pattern was here #### - Partition - base file name: value=| - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - value | - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - numFiles 1 - numRows 1 - partition_columns value - partition_columns.types string - rawDataSize 2 - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key - columns.comments - columns.types string -#### A masked pattern was here #### - name default.combine2 - partition_columns value - partition_columns.types string - serialization.ddl struct combine2 { string key} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.combine2 - name: default.combine2 - Truncated Path -> Alias: - /combine2/value=2010-04-21 09%3A45%3A00 [combine2] - /combine2/value=val_0 [combine2] - /combine2/value=val_2 [combine2] - /combine2/value=val_4 [combine2] - /combine2/value=val_5 [combine2] - /combine2/value=val_8 [combine2] - /combine2/value=val_9 [combine2] - /combine2/value=| [combine2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(1) from combine2 where value is not null -PREHOOK: type: QUERY -PREHOOK: Input: default@combine2 -PREHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 -PREHOOK: Input: default@combine2@value=val_0 -PREHOOK: Input: default@combine2@value=val_2 -PREHOOK: Input: default@combine2@value=val_4 -PREHOOK: Input: default@combine2@value=val_5 -PREHOOK: Input: default@combine2@value=val_8 -PREHOOK: Input: default@combine2@value=val_9 -PREHOOK: Input: default@combine2@value=| -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from combine2 where value is not null -POSTHOOK: type: QUERY -POSTHOOK: Input: default@combine2 -POSTHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 -POSTHOOK: Input: default@combine2@value=val_0 -POSTHOOK: Input: default@combine2@value=val_2 -POSTHOOK: Input: default@combine2@value=val_4 -POSTHOOK: Input: default@combine2@value=val_5 -POSTHOOK: Input: default@combine2@value=val_8 -POSTHOOK: Input: default@combine2@value=val_9 -POSTHOOK: Input: default@combine2@value=| -#### A masked pattern was here #### -12 -PREHOOK: query: explain -select ds, count(1) from srcpart where ds is not null group by ds -PREHOOK: type: QUERY -POSTHOOK: query: explain -select ds, count(1) from srcpart where ds is not null group by ds -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: ds (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -2008-04-08 1000 -2008-04-09 1000 diff --git ql/src/test/results/clientpositive/druid_basic1.q.out ql/src/test/results/clientpositive/druid_basic1.q.out deleted file mode 100644 index a5c0687620..0000000000 --- ql/src/test/results/clientpositive/druid_basic1.q.out +++ /dev/null @@ -1,140 +0,0 @@ -PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@druid_table_1 -POSTHOOK: query: CREATE EXTERNAL TABLE druid_table_1 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@druid_table_1 -PREHOOK: query: DESCRIBE FORMATTED druid_table_1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@druid_table_1 -POSTHOOK: query: DESCRIBE FORMATTED druid_table_1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@druid_table_1 -# col_name data_type comment -__time timestamp with local time zone from deserializer -robot string from deserializer -namespace string from deserializer -anonymous string from deserializer -unpatrolled string from deserializer -page string from deserializer -language string from deserializer -newpage string from deserializer -user string from deserializer -count float from deserializer -added float from deserializer -delta float from deserializer -variation float from deserializer -deleted float from deserializer - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: EXTERNAL_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}} - EXTERNAL TRUE - bucketing_version 2 - druid.datasource wikipedia - numFiles 0 - numRows 0 - rawDataSize 0 - storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler - totalSize 0 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe -InputFormat: null -OutputFormat: null -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: CREATE EXTERNAL TABLE druid_table_2 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@druid_table_2 -POSTHOOK: query: CREATE EXTERNAL TABLE druid_table_2 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@druid_table_2 -PREHOOK: query: DESCRIBE FORMATTED druid_table_2 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@druid_table_2 -POSTHOOK: query: DESCRIBE FORMATTED druid_table_2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@druid_table_2 -# col_name data_type comment -__time timestamp with local time zone from deserializer -robot string from deserializer -namespace string from deserializer -anonymous string from deserializer -unpatrolled string from deserializer -page string from deserializer -language string from deserializer -newpage string from deserializer -user string from deserializer -count float from deserializer -added float from deserializer -delta float from deserializer -variation float from deserializer -deleted float from deserializer - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: EXTERNAL_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}} - EXTERNAL TRUE - bucketing_version 2 - druid.datasource wikipedia - numFiles 0 - numRows 0 - rawDataSize 0 - storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler - totalSize 0 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe -InputFormat: null -OutputFormat: null -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: DROP TABLE druid_table_2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@druid_table_2 -PREHOOK: Output: default@druid_table_2 -POSTHOOK: query: DROP TABLE druid_table_2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@druid_table_2 -POSTHOOK: Output: default@druid_table_2 -PREHOOK: query: DROP TABLE druid_table_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@druid_table_1 -PREHOOK: Output: default@druid_table_1 -POSTHOOK: query: DROP TABLE druid_table_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@druid_table_1 -POSTHOOK: Output: default@druid_table_1 diff --git ql/src/test/results/clientpositive/druid_basic3.q.out ql/src/test/results/clientpositive/druid_basic3.q.out deleted file mode 100644 index 54719f7517..0000000000 --- ql/src/test/results/clientpositive/druid_basic3.q.out +++ /dev/null @@ -1,476 +0,0 @@ -PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n4 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@druid_table_1_n4 -POSTHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n4 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@druid_table_1_n4 -PREHOOK: query: EXPLAIN -SELECT sum(added) + sum(delta) as a, language -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT sum(added) + sum(delta) as a, language -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames a,language - druid.fieldTypes double,string - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" + \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: a (type: double), language (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT sum(delta), sum(added) + sum(delta) AS a, language -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT sum(delta), sum(added) + sum(delta) AS a, language -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames $f1,a,language - druid.fieldTypes double,double,string - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"delta"},{"type":"doubleSum","name":"$f2","fieldName":"added"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f2\" + \"$f1\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: $f1 (type: double), a (type: double), language (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, sum(added) / sum(delta) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, sum(added) / sum(delta) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,a - druid.fieldTypes string,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" / \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), a (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, sum(added) * sum(delta) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, sum(added) * sum(delta) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,a - druid.fieldTypes string,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" * \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), a (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, sum(added) - sum(delta) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, sum(added) - sum(delta) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,a - druid.fieldTypes string,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" - \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), a (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, sum(added) + 100 AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, sum(added) + 100 AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,a - druid.fieldTypes string,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" + CAST(100, 'DOUBLE'))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), a (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, -1 * (a + b) AS c -FROM ( - SELECT (sum(added)-sum(delta)) / (count(*) * 3) AS a, sum(deleted) AS b, language - FROM druid_table_1_n4 - GROUP BY language) subq -ORDER BY c DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, -1 * (a + b) AS c -FROM ( - SELECT (sum(added)-sum(delta)) / (count(*) * 3) AS a, sum(deleted) AS b, language - FROM druid_table_1_n4 - GROUP BY language) subq -ORDER BY c DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,c - druid.fieldTypes string,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"c","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"},{"type":"count","name":"$f3"},{"type":"doubleSum","name":"$f4","fieldName":"deleted"}],"postAggregations":[{"type":"expression","name":"c","expression":"(-1.0 * (((\"$f1\" - \"$f2\") / CAST((\"$f3\" * 3), 'DOUBLE')) + \"$f4\"))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), c (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, robot, sum(added) - sum(delta) AS a -FROM druid_table_1_n4 -WHERE extract (week from `__time`) IN (10,11) -GROUP BY language, robot -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, robot, sum(added) - sum(delta) AS a -FROM druid_table_1_n4 -WHERE extract (week from `__time`) IN (10,11) -GROUP BY language, robot -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,robot,a - druid.fieldTypes string,string,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default"},"filter":{"type":"in","dimension":"__time","values":["10","11"],"extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}},"aggregations":[{"type":"doubleSum","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f2\" - \"$f3\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), robot (type: string), a (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, sum(delta) / count(*) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, sum(delta) / count(*) AS a -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,a - druid.fieldTypes string,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"delta"},{"type":"count","name":"$f2"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" / CAST(\"$f2\", 'DOUBLE'))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), a (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, sum(added) / sum(delta) AS a, - CASE WHEN sum(deleted)=0 THEN 1.0 ELSE sum(deleted) END AS b -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, sum(added) / sum(delta) AS a, - CASE WHEN sum(deleted)=0 THEN 1.0 ELSE sum(deleted) END AS b -FROM druid_table_1_n4 -GROUP BY language -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,a,b - druid.fieldTypes string,double,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"},{"type":"doubleSum","name":"$f3","fieldName":"deleted"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" / \"$f2\")"},{"type":"expression","name":"b","expression":"case_searched((\"$f3\" == 0.0),1,\"$f3\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), a (type: double), b (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, a, a - b as c -FROM ( - SELECT language, sum(added) + 100 AS a, sum(delta) AS b - FROM druid_table_1_n4 - GROUP BY language) subq -ORDER BY a DESC -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, a, a - b as c -FROM ( - SELECT language, sum(added) + 100 AS a, sum(delta) AS b - FROM druid_table_1_n4 - GROUP BY language) subq -ORDER BY a DESC -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames language,a,c - druid.fieldTypes string,double,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" + CAST(100, 'DOUBLE'))"},{"type":"expression","name":"c","expression":"((\"$f1\" + CAST(100, 'DOUBLE')) - \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), a (type: double), c (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, robot, "A" -FROM ( - SELECT sum(added) - sum(delta) AS a, language, robot - FROM druid_table_1_n4 - GROUP BY language, robot ) subq -ORDER BY "A" -LIMIT 5 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, robot, "A" -FROM ( - SELECT sum(added) - sum(delta) AS a, language, robot - FROM druid_table_1_n4 - GROUP BY language, robot ) subq -ORDER BY "A" -LIMIT 5 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames robot,language - druid.fieldTypes string,string - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","limit":5,"columns":[]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), robot (type: string), 'A' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT language, robot, "A" -FROM ( - SELECT language, sum(added) + sum(delta) AS a, robot - FROM druid_table_1_n4 - GROUP BY language, robot) subq -ORDER BY robot, language -LIMIT 5 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT language, robot, "A" -FROM ( - SELECT language, sum(added) + sum(delta) AS a, robot - FROM druid_table_1_n4 - GROUP BY language, robot) subq -ORDER BY robot, language -LIMIT 5 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n4 - properties: - druid.fieldNames robot,language - druid.fieldTypes string,string - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","limit":5,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"},{"dimension":"language","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), robot (type: string), 'A' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - diff --git ql/src/test/results/clientpositive/druid_intervals.q.out ql/src/test/results/clientpositive/druid_intervals.q.out deleted file mode 100644 index 715623ad61..0000000000 --- ql/src/test/results/clientpositive/druid_intervals.q.out +++ /dev/null @@ -1,386 +0,0 @@ -PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n0 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@druid_table_1_n0 -POSTHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n0 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@druid_table_1_n0 -PREHOOK: query: DESCRIBE FORMATTED druid_table_1_n0 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@druid_table_1_n0 -POSTHOOK: query: DESCRIBE FORMATTED druid_table_1_n0 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@druid_table_1_n0 -# col_name data_type comment -__time timestamp with local time zone from deserializer -robot string from deserializer -namespace string from deserializer -anonymous string from deserializer -unpatrolled string from deserializer -page string from deserializer -language string from deserializer -newpage string from deserializer -user string from deserializer -count float from deserializer -added float from deserializer -delta float from deserializer -variation float from deserializer -deleted float from deserializer - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: EXTERNAL_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}} - EXTERNAL TRUE - bucketing_version 2 - druid.datasource wikipedia - numFiles 0 - numRows 0 - rawDataSize 0 - storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler - totalSize 0 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe -InputFormat: null -OutputFormat: null -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` < '2012-03-01 00:00:00' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` < '2012-03-01 00:00:00' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/2012-03-01T08:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` >= '2010-01-01 00:00:00' AND `__time` <= '2012-03-01 00:00:00' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` >= '2010-01-01 00:00:00' AND `__time` <= '2012-03-01 00:00:00' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2012-03-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` >= '2010-01-01 00:00:00' AND `__time` <= '2012-03-01 00:00:00' - AND `__time` < '2011-01-01 00:00:00' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` >= '2010-01-01 00:00:00' AND `__time` <= '2012-03-01 00:00:00' - AND `__time` < '2011-01-01 00:00:00' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` BETWEEN '2010-01-01 00:00:00' AND '2011-01-01 00:00:00' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` BETWEEN '2010-01-01 00:00:00' AND '2011-01-01 00:00:00' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE (`__time` BETWEEN '2010-01-01 00:00:00' AND '2011-01-01 00:00:00') - OR (`__time` BETWEEN '2012-01-01 00:00:00' AND '2013-01-01 00:00:00') -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE (`__time` BETWEEN '2010-01-01 00:00:00' AND '2011-01-01 00:00:00') - OR (`__time` BETWEEN '2012-01-01 00:00:00' AND '2013-01-01 00:00:00') -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z","2012-01-01T08:00:00.000Z/2013-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE (`__time` BETWEEN '2010-01-01 00:00:00' AND '2011-01-01 00:00:00') - OR (`__time` BETWEEN '2010-06-01 00:00:00' AND '2012-01-01 00:00:00') -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE (`__time` BETWEEN '2010-01-01 00:00:00' AND '2011-01-01 00:00:00') - OR (`__time` BETWEEN '2010-06-01 00:00:00' AND '2012-01-01 00:00:00') -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2012-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time` -FROM druid_table_1_n0 -WHERE `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc - druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2010-01-01T08:00:00.001Z","2011-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time`, robot -FROM druid_table_1_n0 -WHERE robot = 'user1' AND `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time`, robot -FROM druid_table_1_n0 -WHERE robot = 'user1' AND `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc,vc0 - druid.fieldTypes timestamp with local time zone,string - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2010-01-01T08:00:00.001Z","2011-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"filter":{"type":"selector","dimension":"robot","value":"user1"},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"'user1'","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone), vc0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - -PREHOOK: query: EXPLAIN -SELECT `__time`, robot -FROM druid_table_1_n0 -WHERE robot = 'user1' OR `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT `__time`, robot -FROM druid_table_1_n0 -WHERE robot = 'user1' OR `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: druid_table_1_n0 - properties: - druid.fieldNames vc,robot - druid.fieldTypes timestamp with local time zone,string - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"selector","dimension":"robot","value":"user1"},{"type":"selector","dimension":"__time","value":"2010-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"__time","value":"2011-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: vc (type: timestamp with local time zone), robot (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink - diff --git ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out deleted file mode 100644 index 0ae9df92f2..0000000000 --- ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out +++ /dev/null @@ -1,176 +0,0 @@ -PREHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) -row format delimited fields terminated by ',' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@fullouter_long_big_1a -POSTHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) -row format delimited fields terminated by ',' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@fullouter_long_big_1a -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@fullouter_long_big_1a -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@fullouter_long_big_1a -PREHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) -row format delimited fields terminated by ',' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@fullouter_long_small_1a -POSTHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) -row format delimited fields terminated by ',' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@fullouter_long_small_1a -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@fullouter_long_small_1a -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@fullouter_long_small_1a -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: s - Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: bigint), s_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: date) - Reduce Operator Tree: - Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key -PREHOOK: type: QUERY -PREHOOK: Input: default@fullouter_long_big_1a -PREHOOK: Input: default@fullouter_long_small_1a -#### A masked pattern was here #### -POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@fullouter_long_big_1a -POSTHOOK: Input: default@fullouter_long_small_1a -#### A masked pattern was here #### --5206670856103795573 NULL NULL --5310365297525168078 NULL NULL --6187919478609154811 NULL NULL --6187919478609154811 NULL NULL --6187919478609154811 NULL NULL --6187919478609154811 NULL NULL --8460550397108077433 NULL NULL -1569543799237464101 NULL NULL -3313583664488247651 NULL NULL -968819023021777205 NULL NULL -NULL -1339636982994067311 2000-06-20 -NULL -1339636982994067311 2008-12-03 -NULL -2098090254092150988 1817-03-12 -NULL -2098090254092150988 2163-05-26 -NULL -2098090254092150988 2219-12-23 -NULL -2184423060953067642 1853-07-06 -NULL -2184423060953067642 1880-10-06 -NULL -2575185053386712613 1809-07-12 -NULL -2575185053386712613 2105-01-21 -NULL -2688622006344936758 1948-10-15 -NULL -2688622006344936758 2129-01-11 -NULL -327698348664467755 2222-10-15 -NULL -3655445881497026796 2108-08-16 -NULL -4224290881682877258 1813-05-17 -NULL -4224290881682877258 2120-01-16 -NULL -4224290881682877258 2185-07-08 -NULL -4961171400048338491 2196-08-10 -NULL -5706981533666803767 1800-09-20 -NULL -5706981533666803767 2151-06-09 -NULL -5754527700632192146 1958-07-15 -NULL -614848861623872247 2101-05-25 -NULL -614848861623872247 2112-11-09 -NULL -6784441713807772877 1845-02-16 -NULL -6784441713807772877 2054-06-17 -NULL -7707546703881534780 2134-08-20 -NULL 214451696109242839 1855-05-12 -NULL 214451696109242839 1977-01-04 -NULL 214451696109242839 2179-04-18 -NULL 2438535236662373438 1881-09-16 -NULL 2438535236662373438 1916-01-10 -NULL 2438535236662373438 2026-06-23 -NULL 3845554233155411208 1805-11-10 -NULL 3845554233155411208 2264-04-05 -NULL 3873405809071478736 1918-11-20 -NULL 3873405809071478736 2034-06-09 -NULL 3873405809071478736 2164-04-23 -NULL 3905351789241845882 1866-07-28 -NULL 3905351789241845882 2045-12-05 -NULL 434940853096155515 2275-02-08 -NULL 4436884039838843341 2031-05-23 -NULL 5246983111579595707 1817-07-01 -NULL 5246983111579595707 2260-05-11 -NULL 5252407779338300447 2039-03-10 -NULL 5252407779338300447 2042-04-26 -NULL 6049335087268933751 2086-12-17 -NULL 6049335087268933751 2282-06-09 -NULL 7297177530102477725 1921-05-11 -NULL 7297177530102477725 1926-04-12 -NULL 7297177530102477725 2125-08-26 -NULL 7937120928560087303 2083-03-14 -NULL 8755921538765428593 1827-05-01 -NULL NULL 2024-01-23 -NULL NULL 2098-02-10 -NULL NULL 2242-02-08 diff --git ql/src/test/results/clientpositive/groupby_sort_1.q.out ql/src/test/results/clientpositive/groupby_sort_1.q.out deleted file mode 100644 index 5784b3e418..0000000000 --- ql/src/test/results/clientpositive/groupby_sort_1.q.out +++ /dev/null @@ -1,6590 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) --- SORT_QUERY_RESULTS - -CREATE TABLE T1(key STRING, val STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) --- SORT_QUERY_RESULTS - -CREATE TABLE T1(key STRING, val STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T1 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t1 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t1 -PREHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@t1 -POSTHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@t1 -POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl1 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key --- matches the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key --- matches the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl2 -PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -PREHOOK: type: QUERY -POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl2 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:string:int -#### A masked pattern was here #### - name default.outputtbl2 - serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:string:int -#### A masked pattern was here #### - name default.outputtbl2 - serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl2 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl2 -POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl2 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl2 -#### A masked pattern was here #### -1 11 1 -2 12 1 -3 13 1 -7 17 1 -8 18 1 -8 28 1 -PREHOOK: query: -- It should work for sub-queries -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -PREHOOK: type: QUERY -POSTHOOK: query: -- It should work for sub-queries -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - val - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: -- It should work for sub-queries with column aliases -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -PREHOOK: type: QUERY -POSTHOOK: query: -- It should work for sub-queries with column aliases -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - k - TOK_SELEXPR - TOK_TABLE_OR_COL - val - v - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - k - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - k - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl3 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed --- by a match to the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed --- by a match to the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl3 - TOK_SELECT - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - 1 - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: 1 (type: int), key (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - name: default.outputtbl3 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - name: default.outputtbl3 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl3 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl3 -POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] -POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl3 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -1 1 1 -1 2 1 -1 3 1 -1 7 1 -1 8 2 -PREHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl4 -PREHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -POSTHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - 1 - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: -- no map-side group by if the group by key contains a function -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY -POSTHOOK: query: -- no map-side group by if the group by key contains a function -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl3 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - + - TOK_TABLE_OR_COL - key - 1 - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - + - TOK_TABLE_OR_COL - key - 1 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), (key + 1) (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - numFiles 1 - numRows 5 - rawDataSize 25 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - numFiles 1 - numRows 5 - rawDataSize 25 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl3 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl3 -POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl3 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -1 2 1 -2 3 1 -3 4 1 -7 8 1 -8 9 2 -PREHOOK: query: -- it should not matter what follows the group by --- test various cases - --- group by followed by another group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -PREHOOK: type: QUERY -POSTHOOK: query: -- it should not matter what follows the group by --- test various cases - --- group by followed by another group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - cnt - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - sum - TOK_TABLE_OR_COL - cnt - TOK_GROUPBY - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -14 1 -16 2 -2 1 -4 1 -6 1 -PREHOOK: query: -- group by followed by a union -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a union -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery1:subq1-subquery1:t1, null-subquery2:subq1-subquery2:t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -1 1 -2 1 -2 1 -3 1 -3 1 -7 1 -7 1 -8 2 -8 2 -PREHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key -) subq1 -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key -) subq1 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - -STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-2 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - -STAGE PLANS: - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: (key + key) (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery2:subq1-subquery2:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery1:subq1-subquery1:t1] -#### A masked pattern was here #### - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key -) subq1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key -) subq1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -14 1 -16 2 -2 1 -2 1 -3 1 -4 1 -6 1 -7 1 -8 2 -PREHOOK: query: -- group by followed by a join -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a join -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - cnt - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - cnt - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq2 - = - . - TOK_TABLE_OR_COL - subq1 - key - . - TOK_TABLE_OR_COL - subq2 - key - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - subq1 - key - TOK_SELEXPR - + - . - TOK_TABLE_OR_COL - subq1 - cnt - . - TOK_TABLE_OR_COL - subq2 - cnt - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1, subq2:t1] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 2 -2 2 -3 2 -7 2 -8 4 -PREHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper -EXPLAIN EXTENDED -SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 -ON subq1.key = subq2.key -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper -EXPLAIN EXTENDED -SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 -ON subq1.key = subq2.key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - val - subq2 - = - . - TOK_TABLE_OR_COL - subq1 - key - . - TOK_TABLE_OR_COL - subq2 - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq2:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] -#### A masked pattern was here #### - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) -CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) -CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T2 -PREHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@t2 -POSTHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -PREHOOK: type: QUERY -POSTHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - 1 - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl5 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys followed by anything -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys followed by anything -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl5 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - 2 - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - 1 - TOK_TABLE_OR_COL - val - 2 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - name: default.outputtbl5 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - name: default.outputtbl5 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl5 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl5 -POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] -PREHOOK: query: SELECT * FROM outputTbl5 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl5 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl5 -#### A masked pattern was here #### -1 1 11 2 1 -2 1 12 2 1 -3 1 13 2 1 -7 1 17 2 1 -8 1 18 2 1 -8 1 28 2 1 -PREHOOK: query: -- contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -PREHOOK: type: QUERY -POSTHOOK: query: -- contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - constant - TOK_SELEXPR - TOK_TABLE_OR_COL - val - subq - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - constant - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - constant - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), 1 (type: int), val (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [subq:t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: -- multiple levels of contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -PREHOOK: type: QUERY -POSTHOOK: query: -- multiple levels of contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - constant - TOK_SELEXPR - TOK_TABLE_OR_COL - val - subq - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - constant - constant2 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - 2 - constant3 - subq2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - constant3 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - constant3 - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), 2 (type: int), val (type: string) - outputColumnNames: _col0, _col3, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col3 (type: int), _col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [subq2:subq:t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 2 11 1 -2 2 12 1 -3 2 13 1 -7 2 17 1 -8 2 18 1 -8 2 28 1 -PREHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST1 -PREHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST2 -PREHOOK: query: EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-4 - Stats-Aggr Operator - -PREHOOK: query: FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@dest1 -PREHOOK: Output: default@dest2 -POSTHOOK: query: FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@dest1 -POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: select * from DEST1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: select * from DEST2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -1 11 1 -2 12 1 -3 13 1 -7 17 1 -8 18 1 -8 28 1 -PREHOOK: query: -- multi-table insert with a sub-query -EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -POSTHOOK: query: -- multi-table insert with a sub-query -EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 8) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: val (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '8' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator - expressions: '8' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-4 - Stats-Aggr Operator - -PREHOOK: query: FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@dest1 -PREHOOK: Output: default@dest2 -POSTHOOK: query: FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@dest1 -POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: select * from DEST1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -8 2 -PREHOOK: query: select * from DEST2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -8 18 1 -8 28 1 diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out deleted file mode 100644 index e912a29716..0000000000 --- ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out +++ /dev/null @@ -1,7097 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) --- SORT_QUERY_RESULTS - -CREATE TABLE T1(key STRING, val STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) --- SORT_QUERY_RESULTS - -CREATE TABLE T1(key STRING, val STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T1 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t1 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t1 -PREHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@t1 -POSTHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@t1 -POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl1 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key --- matches the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key --- matches the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl2 -PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -PREHOOK: type: QUERY -POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl2 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:string:int -#### A masked pattern was here #### - name default.outputtbl2 - serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:string:int -#### A masked pattern was here #### - name default.outputtbl2 - serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl2 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl2 -POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl2 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl2 -#### A masked pattern was here #### -1 11 1 -2 12 1 -3 13 1 -7 17 1 -8 18 1 -8 28 1 -PREHOOK: query: -- It should work for sub-queries -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -PREHOOK: type: QUERY -POSTHOOK: query: -- It should work for sub-queries -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - val - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: -- It should work for sub-queries with column aliases -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -PREHOOK: type: QUERY -POSTHOOK: query: -- It should work for sub-queries with column aliases -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - k - TOK_SELEXPR - TOK_TABLE_OR_COL - val - v - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - k - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - k - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl3 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed --- by a match to the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed --- by a match to the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl3 - TOK_SELECT - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - 1 - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: 1 (type: int), key (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - name: default.outputtbl3 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - name: default.outputtbl3 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl3 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl3 -POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] -POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl3 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -1 1 1 -1 2 1 -1 3 1 -1 7 1 -1 8 2 -PREHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl4 -PREHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -POSTHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - 1 - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: -- no map-side group by if the group by key contains a function -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY -POSTHOOK: query: -- no map-side group by if the group by key contains a function -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl3 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - + - TOK_TABLE_OR_COL - key - 1 - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - + - TOK_TABLE_OR_COL - key - 1 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), (key + 1) (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: double) - mode: partials - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: double) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - numFiles 1 - numRows 5 - rawDataSize 25 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,cnt - columns.comments - columns.types int:int:int -#### A masked pattern was here #### - name default.outputtbl3 - numFiles 1 - numRows 5 - rawDataSize 25 - serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl3 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl3 -POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl3 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl3 -#### A masked pattern was here #### -1 2 1 -2 3 1 -3 4 1 -7 8 1 -8 9 2 -PREHOOK: query: -- it should not matter what follows the group by --- test various cases - --- group by followed by another group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -PREHOOK: type: QUERY -POSTHOOK: query: -- it should not matter what follows the group by --- test various cases - --- group by followed by another group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - cnt - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - sum - TOK_TABLE_OR_COL - cnt - TOK_GROUPBY - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: double) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: double) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -group by key + key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -14 1 -16 2 -2 1 -4 1 -6 1 -PREHOOK: query: -- group by followed by a union -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a union -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery1:subq1-subquery1:t1, null-subquery2:subq1-subquery2:t1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key -) subq1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -1 1 -2 1 -2 1 -3 1 -3 1 -7 1 -7 1 -8 2 -8 2 -PREHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key -) subq1 -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key -) subq1 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - + - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - key - subq1 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - -STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-10 depends on stages: Stage-9 - Stage-2 depends on stages: Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - -STAGE PLANS: - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: (key + key) (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery2:subq1-subquery2:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery1:subq1-subquery1:t1] -#### A masked pattern was here #### - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - name: default.outputtbl1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key -) subq1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key - UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key -) subq1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -14 1 -16 2 -2 1 -2 1 -3 1 -4 1 -6 1 -7 1 -8 2 -PREHOOK: query: -- group by followed by a join -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a join -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - cnt - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - cnt - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq2 - = - . - TOK_TABLE_OR_COL - subq1 - key - . - TOK_TABLE_OR_COL - subq2 - key - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - subq1 - key - TOK_SELEXPR - + - . - TOK_TABLE_OR_COL - subq1 - cnt - . - TOK_TABLE_OR_COL - subq2 - cnt - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1, subq2:t1] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 -ON subq1.key = subq2.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 2 -2 2 -3 2 -7 2 -8 4 -PREHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper -EXPLAIN EXTENDED -SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 -ON subq1.key = subq2.key -PREHOOK: type: QUERY -POSTHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper -EXPLAIN EXTENDED -SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 -JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 -ON subq1.key = subq2.key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - subq1 - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - val - subq2 - = - . - TOK_TABLE_OR_COL - subq1 - key - . - TOK_TABLE_OR_COL - subq2 - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq2:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [subq1:t1] -#### A masked pattern was here #### - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) -CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) -CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T2 -PREHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@t2 -POSTHOOK: query: -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -PREHOOK: type: QUERY -POSTHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl1 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 15 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl1 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl1 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - 1 - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@outputTbl5 -PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys followed by anything -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -PREHOOK: type: QUERY -POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the --- sorted keys followed by anything -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl5 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - 2 - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - 1 - TOK_TABLE_OR_COL - val - 2 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - name: default.outputtbl5 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,key2,key3,key4,cnt - columns.comments - columns.types int:int:string:int:int -#### A masked pattern was here #### - name default.outputtbl5 - serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl5 - name: default.outputtbl5 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl5 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl5 -POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] -PREHOOK: query: SELECT * FROM outputTbl5 -ORDER BY key1, key2, key3, key4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl5 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl5 -ORDER BY key1, key2, key3, key4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl5 -#### A masked pattern was here #### -1 1 11 2 1 -2 1 12 2 1 -3 1 13 2 1 -7 1 17 2 1 -8 1 18 2 1 -8 1 28 2 1 -PREHOOK: query: -- contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -PREHOOK: type: QUERY -POSTHOOK: query: -- contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - constant - TOK_SELEXPR - TOK_TABLE_OR_COL - val - subq - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - constant - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - constant - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), 1 (type: int), val (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [subq:t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq -group by key, constant, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: -- multiple levels of contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -PREHOOK: type: QUERY -POSTHOOK: query: -- multiple levels of contants from sub-queries should work fine -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - T2 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - constant - TOK_SELEXPR - TOK_TABLE_OR_COL - val - subq - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - constant - constant2 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - 2 - constant3 - subq2 - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - outputTbl4 - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - constant3 - TOK_SELEXPR - TOK_TABLE_OR_COL - val - TOK_SELEXPR - TOK_FUNCTION - count - 1 - TOK_GROUPBY - TOK_TABLE_OR_COL - key - TOK_TABLE_OR_COL - constant3 - TOK_TABLE_OR_COL - val - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), 2 (type: int), val (type: string) - outputColumnNames: _col0, _col3, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col3 (type: int), _col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t2 - numFiles 1 - numRows 6 - rawDataSize 24 - serialization.ddl struct t2 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - name: default.t2 - Truncated Path -> Alias: - /t2 [subq2:subq:t2] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10001 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key1,key2,key3,cnt - columns.comments - columns.types int:int:string:int -#### A masked pattern was here #### - name default.outputtbl4 - numFiles 1 - numRows 6 - rawDataSize 48 - serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 54 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl4 - name: default.outputtbl4 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -select key, constant3, val, count(1) from -( -SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq -)subq2 -group by key, constant3, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 2 11 1 -2 2 12 1 -3 2 13 1 -7 2 17 1 -8 2 18 1 -8 2 28 1 -PREHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST1 -PREHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST2 -PREHOOK: query: EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-5 - Stats-Aggr Operator - -PREHOOK: query: FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@dest1 -PREHOOK: Output: default@dest2 -POSTHOOK: query: FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@dest1 -POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: select * from DEST1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -PREHOOK: query: select * from DEST2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -1 11 1 -2 12 1 -3 13 1 -7 17 1 -8 18 1 -8 28 1 -PREHOOK: query: -- multi-table insert with a sub-query -EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -POSTHOOK: query: -- multi-table insert with a sub-query -EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 8) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: val (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '8' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator - expressions: '8' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-5 - Stats-Aggr Operator - -PREHOOK: query: FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -PREHOOK: Output: default@dest1 -PREHOOK: Output: default@dest2 -POSTHOOK: query: FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Output: default@dest1 -POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] -POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: select * from DEST1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -8 2 -PREHOOK: query: select * from DEST2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: select * from DEST2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -8 18 1 -8 28 1 diff --git ql/src/test/results/clientpositive/input12_hadoop20.q.out ql/src/test/results/clientpositive/input12_hadoop20.q.out deleted file mode 100644 index e280c81757..0000000000 --- ql/src/test/results/clientpositive/input12_hadoop20.q.out +++ /dev/null @@ -1,822 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1 -PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest2 -PREHOOK: query: CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest3 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 -INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 -INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 -INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 -INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 - Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 - Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 - -STAGE PLANS: - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 100) (type: boolean) - Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Filter Operator - predicate: ((key >= 100) and (key < 200)) (type: boolean) - Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Filter Operator - predicate: (key >= 200) (type: boolean) - Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(key) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-9 - Conditional Operator - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-8 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator - - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-2 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 12 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-16 - Stats-Aggr Operator - - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-19 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 -INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 -INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1 -PREHOOK: Output: default@dest2 -PREHOOK: Output: default@dest3@ds=2008-04-08/hr=12 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 -INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 -INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1 -POSTHOOK: Output: default@dest2 -POSTHOOK: Output: default@dest3@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1.* FROM dest1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1.* FROM dest1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -86 val_86 -27 val_27 -98 val_98 -66 val_66 -37 val_37 -15 val_15 -82 val_82 -17 val_17 -0 val_0 -57 val_57 -20 val_20 -92 val_92 -47 val_47 -72 val_72 -4 val_4 -35 val_35 -54 val_54 -51 val_51 -65 val_65 -83 val_83 -12 val_12 -67 val_67 -84 val_84 -58 val_58 -8 val_8 -24 val_24 -42 val_42 -0 val_0 -96 val_96 -26 val_26 -51 val_51 -43 val_43 -95 val_95 -98 val_98 -85 val_85 -77 val_77 -0 val_0 -87 val_87 -15 val_15 -72 val_72 -90 val_90 -19 val_19 -10 val_10 -5 val_5 -58 val_58 -35 val_35 -95 val_95 -11 val_11 -34 val_34 -42 val_42 -78 val_78 -76 val_76 -41 val_41 -30 val_30 -64 val_64 -76 val_76 -74 val_74 -69 val_69 -33 val_33 -70 val_70 -5 val_5 -2 val_2 -35 val_35 -80 val_80 -44 val_44 -53 val_53 -90 val_90 -12 val_12 -5 val_5 -70 val_70 -24 val_24 -70 val_70 -83 val_83 -26 val_26 -67 val_67 -18 val_18 -9 val_9 -18 val_18 -97 val_97 -84 val_84 -28 val_28 -37 val_37 -90 val_90 -97 val_97 -PREHOOK: query: SELECT dest2.* FROM dest2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest2.* FROM dest2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -165 val_165 -193 val_193 -150 val_150 -128 val_128 -146 val_146 -152 val_152 -145 val_145 -166 val_166 -153 val_153 -193 val_193 -174 val_174 -199 val_199 -174 val_174 -162 val_162 -167 val_167 -195 val_195 -113 val_113 -155 val_155 -128 val_128 -149 val_149 -129 val_129 -170 val_170 -157 val_157 -111 val_111 -169 val_169 -125 val_125 -192 val_192 -187 val_187 -176 val_176 -138 val_138 -103 val_103 -176 val_176 -137 val_137 -180 val_180 -181 val_181 -138 val_138 -179 val_179 -172 val_172 -129 val_129 -158 val_158 -119 val_119 -197 val_197 -100 val_100 -199 val_199 -191 val_191 -165 val_165 -120 val_120 -131 val_131 -156 val_156 -196 val_196 -197 val_197 -187 val_187 -137 val_137 -169 val_169 -179 val_179 -118 val_118 -134 val_134 -138 val_138 -118 val_118 -177 val_177 -168 val_168 -143 val_143 -160 val_160 -195 val_195 -119 val_119 -149 val_149 -138 val_138 -103 val_103 -113 val_113 -167 val_167 -116 val_116 -191 val_191 -128 val_128 -193 val_193 -104 val_104 -175 val_175 -105 val_105 -190 val_190 -114 val_114 -164 val_164 -125 val_125 -164 val_164 -187 val_187 -104 val_104 -163 val_163 -119 val_119 -199 val_199 -120 val_120 -169 val_169 -178 val_178 -136 val_136 -172 val_172 -133 val_133 -175 val_175 -189 val_189 -134 val_134 -100 val_100 -146 val_146 -186 val_186 -167 val_167 -183 val_183 -152 val_152 -194 val_194 -126 val_126 -169 val_169 -PREHOOK: query: SELECT dest3.* FROM dest3 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest3 -PREHOOK: Input: default@dest3@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest3.* FROM dest3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest3 -POSTHOOK: Input: default@dest3@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -238 2008-04-08 12 -311 2008-04-08 12 -409 2008-04-08 12 -255 2008-04-08 12 -278 2008-04-08 12 -484 2008-04-08 12 -265 2008-04-08 12 -401 2008-04-08 12 -273 2008-04-08 12 -224 2008-04-08 12 -369 2008-04-08 12 -213 2008-04-08 12 -406 2008-04-08 12 -429 2008-04-08 12 -374 2008-04-08 12 -469 2008-04-08 12 -495 2008-04-08 12 -327 2008-04-08 12 -281 2008-04-08 12 -277 2008-04-08 12 -209 2008-04-08 12 -403 2008-04-08 12 -417 2008-04-08 12 -430 2008-04-08 12 -252 2008-04-08 12 -292 2008-04-08 12 -219 2008-04-08 12 -287 2008-04-08 12 -338 2008-04-08 12 -446 2008-04-08 12 -459 2008-04-08 12 -394 2008-04-08 12 -237 2008-04-08 12 -482 2008-04-08 12 -413 2008-04-08 12 -494 2008-04-08 12 -207 2008-04-08 12 -466 2008-04-08 12 -208 2008-04-08 12 -399 2008-04-08 12 -396 2008-04-08 12 -247 2008-04-08 12 -417 2008-04-08 12 -489 2008-04-08 12 -377 2008-04-08 12 -397 2008-04-08 12 -309 2008-04-08 12 -365 2008-04-08 12 -266 2008-04-08 12 -439 2008-04-08 12 -342 2008-04-08 12 -367 2008-04-08 12 -325 2008-04-08 12 -475 2008-04-08 12 -203 2008-04-08 12 -339 2008-04-08 12 -455 2008-04-08 12 -311 2008-04-08 12 -316 2008-04-08 12 -302 2008-04-08 12 -205 2008-04-08 12 -438 2008-04-08 12 -345 2008-04-08 12 -489 2008-04-08 12 -378 2008-04-08 12 -221 2008-04-08 12 -280 2008-04-08 12 -427 2008-04-08 12 -277 2008-04-08 12 -208 2008-04-08 12 -356 2008-04-08 12 -399 2008-04-08 12 -382 2008-04-08 12 -498 2008-04-08 12 -386 2008-04-08 12 -437 2008-04-08 12 -469 2008-04-08 12 -286 2008-04-08 12 -459 2008-04-08 12 -239 2008-04-08 12 -213 2008-04-08 12 -216 2008-04-08 12 -430 2008-04-08 12 -278 2008-04-08 12 -289 2008-04-08 12 -221 2008-04-08 12 -318 2008-04-08 12 -332 2008-04-08 12 -311 2008-04-08 12 -275 2008-04-08 12 -241 2008-04-08 12 -333 2008-04-08 12 -284 2008-04-08 12 -230 2008-04-08 12 -260 2008-04-08 12 -404 2008-04-08 12 -384 2008-04-08 12 -489 2008-04-08 12 -353 2008-04-08 12 -373 2008-04-08 12 -272 2008-04-08 12 -217 2008-04-08 12 -348 2008-04-08 12 -466 2008-04-08 12 -411 2008-04-08 12 -230 2008-04-08 12 -208 2008-04-08 12 -348 2008-04-08 12 -463 2008-04-08 12 -431 2008-04-08 12 -496 2008-04-08 12 -322 2008-04-08 12 -468 2008-04-08 12 -393 2008-04-08 12 -454 2008-04-08 12 -298 2008-04-08 12 -418 2008-04-08 12 -327 2008-04-08 12 -230 2008-04-08 12 -205 2008-04-08 12 -404 2008-04-08 12 -436 2008-04-08 12 -469 2008-04-08 12 -468 2008-04-08 12 -308 2008-04-08 12 -288 2008-04-08 12 -481 2008-04-08 12 -457 2008-04-08 12 -282 2008-04-08 12 -318 2008-04-08 12 -318 2008-04-08 12 -409 2008-04-08 12 -470 2008-04-08 12 -369 2008-04-08 12 -316 2008-04-08 12 -413 2008-04-08 12 -490 2008-04-08 12 -364 2008-04-08 12 -395 2008-04-08 12 -282 2008-04-08 12 -238 2008-04-08 12 -419 2008-04-08 12 -307 2008-04-08 12 -435 2008-04-08 12 -277 2008-04-08 12 -273 2008-04-08 12 -306 2008-04-08 12 -224 2008-04-08 12 -309 2008-04-08 12 -389 2008-04-08 12 -327 2008-04-08 12 -242 2008-04-08 12 -369 2008-04-08 12 -392 2008-04-08 12 -272 2008-04-08 12 -331 2008-04-08 12 -401 2008-04-08 12 -242 2008-04-08 12 -452 2008-04-08 12 -226 2008-04-08 12 -497 2008-04-08 12 -402 2008-04-08 12 -396 2008-04-08 12 -317 2008-04-08 12 -395 2008-04-08 12 -336 2008-04-08 12 -229 2008-04-08 12 -233 2008-04-08 12 -472 2008-04-08 12 -322 2008-04-08 12 -498 2008-04-08 12 -321 2008-04-08 12 -430 2008-04-08 12 -489 2008-04-08 12 -458 2008-04-08 12 -223 2008-04-08 12 -492 2008-04-08 12 -449 2008-04-08 12 -218 2008-04-08 12 -228 2008-04-08 12 -453 2008-04-08 12 -209 2008-04-08 12 -468 2008-04-08 12 -342 2008-04-08 12 -230 2008-04-08 12 -368 2008-04-08 12 -296 2008-04-08 12 -216 2008-04-08 12 -367 2008-04-08 12 -344 2008-04-08 12 -274 2008-04-08 12 -219 2008-04-08 12 -239 2008-04-08 12 -485 2008-04-08 12 -223 2008-04-08 12 -256 2008-04-08 12 -263 2008-04-08 12 -487 2008-04-08 12 -480 2008-04-08 12 -401 2008-04-08 12 -288 2008-04-08 12 -244 2008-04-08 12 -438 2008-04-08 12 -467 2008-04-08 12 -432 2008-04-08 12 -202 2008-04-08 12 -316 2008-04-08 12 -229 2008-04-08 12 -469 2008-04-08 12 -463 2008-04-08 12 -280 2008-04-08 12 -283 2008-04-08 12 -331 2008-04-08 12 -235 2008-04-08 12 -321 2008-04-08 12 -335 2008-04-08 12 -466 2008-04-08 12 -366 2008-04-08 12 -403 2008-04-08 12 -483 2008-04-08 12 -257 2008-04-08 12 -406 2008-04-08 12 -409 2008-04-08 12 -406 2008-04-08 12 -401 2008-04-08 12 -258 2008-04-08 12 -203 2008-04-08 12 -262 2008-04-08 12 -348 2008-04-08 12 -424 2008-04-08 12 -396 2008-04-08 12 -201 2008-04-08 12 -217 2008-04-08 12 -431 2008-04-08 12 -454 2008-04-08 12 -478 2008-04-08 12 -298 2008-04-08 12 -431 2008-04-08 12 -424 2008-04-08 12 -382 2008-04-08 12 -397 2008-04-08 12 -480 2008-04-08 12 -291 2008-04-08 12 -351 2008-04-08 12 -255 2008-04-08 12 -438 2008-04-08 12 -414 2008-04-08 12 -200 2008-04-08 12 -491 2008-04-08 12 -237 2008-04-08 12 -439 2008-04-08 12 -360 2008-04-08 12 -248 2008-04-08 12 -479 2008-04-08 12 -305 2008-04-08 12 -417 2008-04-08 12 -444 2008-04-08 12 -429 2008-04-08 12 -443 2008-04-08 12 -323 2008-04-08 12 -325 2008-04-08 12 -277 2008-04-08 12 -230 2008-04-08 12 -478 2008-04-08 12 -468 2008-04-08 12 -310 2008-04-08 12 -317 2008-04-08 12 -333 2008-04-08 12 -493 2008-04-08 12 -460 2008-04-08 12 -207 2008-04-08 12 -249 2008-04-08 12 -265 2008-04-08 12 -480 2008-04-08 12 -353 2008-04-08 12 -214 2008-04-08 12 -462 2008-04-08 12 -233 2008-04-08 12 -406 2008-04-08 12 -454 2008-04-08 12 -375 2008-04-08 12 -401 2008-04-08 12 -421 2008-04-08 12 -407 2008-04-08 12 -384 2008-04-08 12 -256 2008-04-08 12 -384 2008-04-08 12 -379 2008-04-08 12 -462 2008-04-08 12 -492 2008-04-08 12 -298 2008-04-08 12 -341 2008-04-08 12 -498 2008-04-08 12 -458 2008-04-08 12 -362 2008-04-08 12 -285 2008-04-08 12 -348 2008-04-08 12 -273 2008-04-08 12 -281 2008-04-08 12 -344 2008-04-08 12 -469 2008-04-08 12 -315 2008-04-08 12 -448 2008-04-08 12 -348 2008-04-08 12 -307 2008-04-08 12 -414 2008-04-08 12 -477 2008-04-08 12 -222 2008-04-08 12 -403 2008-04-08 12 -400 2008-04-08 12 -200 2008-04-08 12 diff --git ql/src/test/results/clientpositive/input39_hadoop20.q.out ql/src/test/results/clientpositive/input39_hadoop20.q.out deleted file mode 100644 index d7b92e066b..0000000000 --- ql/src/test/results/clientpositive/input39_hadoop20.q.out +++ /dev/null @@ -1,163 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - - -create table t1(key string, value string) partitioned by (ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - - -create table t1(key string, value string) partitioned by (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1 -PREHOOK: query: create table t2(key string, value string) partitioned by (ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t2(key string, value string) partitioned by (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t2 -PREHOOK: query: insert overwrite table t1 partition (ds='1') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@t1@ds=1 -POSTHOOK: query: insert overwrite table t1 partition (ds='1') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@t1@ds=1 -POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table t1 partition (ds='2') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@t1@ds=2 -POSTHOOK: query: insert overwrite table t1 partition (ds='2') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@t1@ds=2 -POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table t2 partition (ds='1') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@t2@ds=1 -POSTHOOK: query: insert overwrite table t2 partition (ds='1') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@t2@ds=1 -POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((hash(rand(460476415)) & 2147483647) % 32) = 0) and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: t1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((hash(rand(460476415)) & 2147483647) % 32) = 0) and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t1@ds=1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t2@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t1@ds=1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t2@ds=1 -#### A masked pattern was here #### -18 -mapred.job.tracker=localhost:58 diff --git ql/src/test/results/clientpositive/join14_hadoop20.q.out ql/src/test/results/clientpositive/join14_hadoop20.q.out deleted file mode 100644 index 9f759768de..0000000000 --- ql/src/test/results/clientpositive/join14_hadoop20.q.out +++ /dev/null @@ -1,1864 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) - Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - TableScan - alias: src - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) - Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-2 - Stats-Aggr Operator - -PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest1 -POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select dest1.* from dest1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: select dest1.* from dest1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -105 val_105 -105 val_105 -111 val_111 -111 val_111 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -114 val_114 -114 val_114 -116 val_116 -116 val_116 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -120 val_120 -120 val_120 -120 val_120 -120 val_120 -120 val_120 -120 val_120 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -125 val_125 -125 val_125 -125 val_125 -125 val_125 -125 val_125 -125 val_125 -126 val_126 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -129 val_129 -129 val_129 -129 val_129 -129 val_129 -129 val_129 -129 val_129 -131 val_131 -131 val_131 -133 val_133 -133 val_133 -134 val_134 -134 val_134 -134 val_134 -134 val_134 -134 val_134 -134 val_134 -134 val_134 -134 val_134 -136 val_136 -136 val_136 -137 val_137 -137 val_137 -137 val_137 -137 val_137 -137 val_137 -137 val_137 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -143 val_143 -145 val_145 -145 val_145 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -149 val_149 -149 val_149 -149 val_149 -149 val_149 -149 val_149 -149 val_149 -150 val_150 -150 val_150 -152 val_152 -152 val_152 -152 val_152 -152 val_152 -152 val_152 -152 val_152 -152 val_152 -152 val_152 -153 val_153 -153 val_153 -155 val_155 -155 val_155 -156 val_156 -156 val_156 -157 val_157 -157 val_157 -158 val_158 -158 val_158 -160 val_160 -160 val_160 -162 val_162 -162 val_162 -163 val_163 -163 val_163 -164 val_164 -164 val_164 -164 val_164 -164 val_164 -164 val_164 -164 val_164 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -166 val_166 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -170 val_170 -170 val_170 -172 val_172 -172 val_172 -172 val_172 -172 val_172 -172 val_172 -172 val_172 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -174 val_174 -174 val_174 -174 val_174 -174 val_174 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -175 val_175 -175 val_175 -175 val_175 -175 val_175 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -176 val_176 -176 val_176 -176 val_176 -176 val_176 -176 val_176 -176 val_176 -177 val_177 -177 val_177 -178 val_178 -178 val_178 -179 val_179 -179 val_179 -179 val_179 -179 val_179 -179 val_179 -179 val_179 -179 val_179 -179 val_179 -180 val_180 -180 val_180 -181 val_181 -181 val_181 -183 val_183 -183 val_183 -186 val_186 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -189 val_189 -190 val_190 -190 val_190 -191 val_191 -191 val_191 -191 val_191 -191 val_191 -191 val_191 -191 val_191 -191 val_191 -191 val_191 -192 val_192 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -194 val_194 -195 val_195 -195 val_195 -195 val_195 -195 val_195 -195 val_195 -195 val_195 -195 val_195 -195 val_195 -196 val_196 -196 val_196 -197 val_197 -197 val_197 -197 val_197 -197 val_197 -197 val_197 -197 val_197 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -200 val_200 -200 val_200 -200 val_200 -200 val_200 -200 val_200 -200 val_200 -200 val_200 -200 val_200 -201 val_201 -201 val_201 -202 val_202 -202 val_202 -203 val_203 -203 val_203 -203 val_203 -203 val_203 -203 val_203 -203 val_203 -203 val_203 -203 val_203 -205 val_205 -205 val_205 -205 val_205 -205 val_205 -205 val_205 -205 val_205 -205 val_205 -205 val_205 -207 val_207 -207 val_207 -207 val_207 -207 val_207 -207 val_207 -207 val_207 -207 val_207 -207 val_207 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -209 val_209 -209 val_209 -209 val_209 -209 val_209 -209 val_209 -209 val_209 -209 val_209 -209 val_209 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -214 val_214 -214 val_214 -216 val_216 -216 val_216 -216 val_216 -216 val_216 -216 val_216 -216 val_216 -216 val_216 -216 val_216 -217 val_217 -217 val_217 -217 val_217 -217 val_217 -217 val_217 -217 val_217 -217 val_217 -217 val_217 -218 val_218 -218 val_218 -219 val_219 -219 val_219 -219 val_219 -219 val_219 -219 val_219 -219 val_219 -219 val_219 -219 val_219 -221 val_221 -221 val_221 -221 val_221 -221 val_221 -221 val_221 -221 val_221 -221 val_221 -221 val_221 -222 val_222 -222 val_222 -223 val_223 -223 val_223 -223 val_223 -223 val_223 -223 val_223 -223 val_223 -223 val_223 -223 val_223 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -226 val_226 -226 val_226 -228 val_228 -228 val_228 -229 val_229 -229 val_229 -229 val_229 -229 val_229 -229 val_229 -229 val_229 -229 val_229 -229 val_229 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -233 val_233 -233 val_233 -233 val_233 -233 val_233 -233 val_233 -233 val_233 -233 val_233 -233 val_233 -235 val_235 -235 val_235 -237 val_237 -237 val_237 -237 val_237 -237 val_237 -237 val_237 -237 val_237 -237 val_237 -237 val_237 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -239 val_239 -239 val_239 -239 val_239 -239 val_239 -239 val_239 -239 val_239 -239 val_239 -239 val_239 -241 val_241 -241 val_241 -242 val_242 -242 val_242 -242 val_242 -242 val_242 -242 val_242 -242 val_242 -242 val_242 -242 val_242 -244 val_244 -244 val_244 -247 val_247 -247 val_247 -248 val_248 -248 val_248 -249 val_249 -249 val_249 -252 val_252 -252 val_252 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -256 val_256 -256 val_256 -256 val_256 -256 val_256 -256 val_256 -256 val_256 -256 val_256 -256 val_256 -257 val_257 -257 val_257 -258 val_258 -258 val_258 -260 val_260 -260 val_260 -262 val_262 -262 val_262 -263 val_263 -263 val_263 -265 val_265 -265 val_265 -265 val_265 -265 val_265 -265 val_265 -265 val_265 -265 val_265 -265 val_265 -266 val_266 -266 val_266 -272 val_272 -272 val_272 -272 val_272 -272 val_272 -272 val_272 -272 val_272 -272 val_272 -272 val_272 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -274 val_274 -274 val_274 -275 val_275 -275 val_275 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -280 val_280 -280 val_280 -280 val_280 -280 val_280 -280 val_280 -280 val_280 -280 val_280 -280 val_280 -281 val_281 -281 val_281 -281 val_281 -281 val_281 -281 val_281 -281 val_281 -281 val_281 -281 val_281 -282 val_282 -282 val_282 -282 val_282 -282 val_282 -282 val_282 -282 val_282 -282 val_282 -282 val_282 -283 val_283 -283 val_283 -284 val_284 -284 val_284 -285 val_285 -285 val_285 -286 val_286 -286 val_286 -287 val_287 -287 val_287 -288 val_288 -288 val_288 -288 val_288 -288 val_288 -288 val_288 -288 val_288 -288 val_288 -288 val_288 -289 val_289 -289 val_289 -291 val_291 -291 val_291 -292 val_292 -292 val_292 -296 val_296 -296 val_296 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -302 val_302 -302 val_302 -305 val_305 -305 val_305 -306 val_306 -306 val_306 -307 val_307 -307 val_307 -307 val_307 -307 val_307 -307 val_307 -307 val_307 -307 val_307 -307 val_307 -308 val_308 -308 val_308 -309 val_309 -309 val_309 -309 val_309 -309 val_309 -309 val_309 -309 val_309 -309 val_309 -309 val_309 -310 val_310 -310 val_310 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -315 val_315 -315 val_315 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -317 val_317 -317 val_317 -317 val_317 -317 val_317 -317 val_317 -317 val_317 -317 val_317 -317 val_317 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -321 val_321 -321 val_321 -321 val_321 -321 val_321 -321 val_321 -321 val_321 -321 val_321 -321 val_321 -322 val_322 -322 val_322 -322 val_322 -322 val_322 -322 val_322 -322 val_322 -322 val_322 -322 val_322 -323 val_323 -323 val_323 -325 val_325 -325 val_325 -325 val_325 -325 val_325 -325 val_325 -325 val_325 -325 val_325 -325 val_325 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -331 val_331 -331 val_331 -331 val_331 -331 val_331 -331 val_331 -331 val_331 -331 val_331 -331 val_331 -332 val_332 -332 val_332 -333 val_333 -333 val_333 -333 val_333 -333 val_333 -333 val_333 -333 val_333 -333 val_333 -333 val_333 -335 val_335 -335 val_335 -336 val_336 -336 val_336 -338 val_338 -338 val_338 -339 val_339 -339 val_339 -341 val_341 -341 val_341 -342 val_342 -342 val_342 -342 val_342 -342 val_342 -342 val_342 -342 val_342 -342 val_342 -342 val_342 -344 val_344 -344 val_344 -344 val_344 -344 val_344 -344 val_344 -344 val_344 -344 val_344 -344 val_344 -345 val_345 -345 val_345 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -351 val_351 -351 val_351 -353 val_353 -353 val_353 -353 val_353 -353 val_353 -353 val_353 -353 val_353 -353 val_353 -353 val_353 -356 val_356 -356 val_356 -360 val_360 -360 val_360 -362 val_362 -362 val_362 -364 val_364 -364 val_364 -365 val_365 -365 val_365 -366 val_366 -366 val_366 -367 val_367 -367 val_367 -367 val_367 -367 val_367 -367 val_367 -367 val_367 -367 val_367 -367 val_367 -368 val_368 -368 val_368 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -373 val_373 -373 val_373 -374 val_374 -374 val_374 -375 val_375 -375 val_375 -377 val_377 -377 val_377 -378 val_378 -378 val_378 -379 val_379 -379 val_379 -382 val_382 -382 val_382 -382 val_382 -382 val_382 -382 val_382 -382 val_382 -382 val_382 -382 val_382 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -384 val_384 -386 val_386 -386 val_386 -389 val_389 -389 val_389 -392 val_392 -392 val_392 -393 val_393 -393 val_393 -394 val_394 -394 val_394 -395 val_395 -395 val_395 -395 val_395 -395 val_395 -395 val_395 -395 val_395 -395 val_395 -395 val_395 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -397 val_397 -397 val_397 -397 val_397 -397 val_397 -397 val_397 -397 val_397 -397 val_397 -397 val_397 -399 val_399 -399 val_399 -399 val_399 -399 val_399 -399 val_399 -399 val_399 -399 val_399 -399 val_399 -400 val_400 -400 val_400 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -402 val_402 -402 val_402 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -403 val_403 -404 val_404 -404 val_404 -404 val_404 -404 val_404 -404 val_404 -404 val_404 -404 val_404 -404 val_404 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -407 val_407 -407 val_407 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -411 val_411 -411 val_411 -413 val_413 -413 val_413 -413 val_413 -413 val_413 -413 val_413 -413 val_413 -413 val_413 -413 val_413 -414 val_414 -414 val_414 -414 val_414 -414 val_414 -414 val_414 -414 val_414 -414 val_414 -414 val_414 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -418 val_418 -418 val_418 -419 val_419 -419 val_419 -421 val_421 -421 val_421 -424 val_424 -424 val_424 -424 val_424 -424 val_424 -424 val_424 -424 val_424 -424 val_424 -424 val_424 -427 val_427 -427 val_427 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -432 val_432 -432 val_432 -435 val_435 -435 val_435 -436 val_436 -436 val_436 -437 val_437 -437 val_437 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -439 val_439 -439 val_439 -439 val_439 -439 val_439 -439 val_439 -439 val_439 -439 val_439 -439 val_439 -443 val_443 -443 val_443 -444 val_444 -444 val_444 -446 val_446 -446 val_446 -448 val_448 -448 val_448 -449 val_449 -449 val_449 -452 val_452 -452 val_452 -453 val_453 -453 val_453 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -454 val_454 -455 val_455 -455 val_455 -457 val_457 -457 val_457 -458 val_458 -458 val_458 -458 val_458 -458 val_458 -458 val_458 -458 val_458 -458 val_458 -458 val_458 -459 val_459 -459 val_459 -459 val_459 -459 val_459 -459 val_459 -459 val_459 -459 val_459 -459 val_459 -460 val_460 -460 val_460 -462 val_462 -462 val_462 -462 val_462 -462 val_462 -462 val_462 -462 val_462 -462 val_462 -462 val_462 -463 val_463 -463 val_463 -463 val_463 -463 val_463 -463 val_463 -463 val_463 -463 val_463 -463 val_463 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -467 val_467 -467 val_467 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -470 val_470 -470 val_470 -472 val_472 -472 val_472 -475 val_475 -475 val_475 -477 val_477 -477 val_477 -478 val_478 -478 val_478 -478 val_478 -478 val_478 -478 val_478 -478 val_478 -478 val_478 -478 val_478 -479 val_479 -479 val_479 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -481 val_481 -481 val_481 -482 val_482 -482 val_482 -483 val_483 -483 val_483 -484 val_484 -484 val_484 -485 val_485 -485 val_485 -487 val_487 -487 val_487 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -490 val_490 -490 val_490 -491 val_491 -491 val_491 -492 val_492 -492 val_492 -492 val_492 -492 val_492 -492 val_492 -492 val_492 -492 val_492 -492 val_492 -493 val_493 -493 val_493 -494 val_494 -494 val_494 -495 val_495 -495 val_495 -496 val_496 -496 val_496 -497 val_497 -497 val_497 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 -498 val_498 diff --git ql/src/test/results/clientpositive/join45X.q.out ql/src/test/results/clientpositive/join45X.q.out deleted file mode 100644 index 880a2e3a3f..0000000000 --- ql/src/test/results/clientpositive/join45X.q.out +++ /dev/null @@ -1,256 +0,0 @@ -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: EXPLAIN -SELECT * -FROM src1 JOIN src -ON (src1.key= 100 and src.key=100) -LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT * -FROM src1 JOIN src -ON (src1.key= 100 and src.key=100) -LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key) = 100.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) = 100.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - alias: src - filterExpr: (UDFToDouble(key) = 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) = 100.0D) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3000 Data size: 57622 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: EXPLAIN -SELECT * -FROM src1 JOIN src -ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102))) -LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT * -FROM src1 JOIN src -ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102))) -LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((UDFToDouble(_col0) = 100.0D) and (UDFToDouble(_col2) = 100.0D)) or ((UDFToDouble(_col0) = 101.0D) and (UDFToDouble(_col2) = 101.0D)) or ((UDFToDouble(_col0) = 102.0D) and (UDFToDouble(_col2) = 102.0D))) (type: boolean) - Statistics: Num rows: 9375 Data size: 180600 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: SELECT * -FROM src1 JOIN src -ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102))) -LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * -FROM src1 JOIN src -ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102))) -LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: EXPLAIN - SELECT * - FROM src1 JOIN src - ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102))) - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN - SELECT * - FROM src1 JOIN src - ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102))) - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((UDFToDouble(_col0) = 100.0D) and (UDFToDouble(_col2) = 100.0D)) or ((UDFToDouble(_col0) = 101.0D) and (UDFToDouble(_col2) = 101.0D)) or ((UDFToDouble(_col0) = 102.0D) and (UDFToDouble(_col2) = 102.0D))) (type: boolean) - Statistics: Num rows: 9375 Data size: 180600 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - diff --git ql/src/test/results/clientpositive/loadpart_err.q.out ql/src/test/results/clientpositive/loadpart_err.q.out deleted file mode 100644 index 7e035ff0c4..0000000000 --- ql/src/test/results/clientpositive/loadpart_err.q.out +++ /dev/null @@ -1,28 +0,0 @@ -PREHOOK: query: CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@loadpart1 -PREHOOK: query: INSERT OVERWRITE TABLE loadpart1 PARTITION (ds='2009-01-01') -SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue) -FROM src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@loadpart1@ds=2009-01-01 -FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask -PREHOOK: query: DESCRIBE loadpart1 -PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE loadpart1 -POSTHOOK: type: DESCTABLE -a string -b string -ds string -PREHOOK: query: SHOW PARTITIONS loadpart1 -PREHOOK: type: SHOWPARTITIONS -POSTHOOK: query: SHOW PARTITIONS loadpart1 -POSTHOOK: type: SHOWPARTITIONS -FAILED: Error in semantic analysis: line 3:23 Invalid Path '../data1/files/kv1.txt': No files matching path file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/data1/files/kv1.txt -PREHOOK: query: SHOW PARTITIONS loadpart1 -PREHOOK: type: SHOWPARTITIONS -POSTHOOK: query: SHOW PARTITIONS loadpart1 -POSTHOOK: type: SHOWPARTITIONS diff --git ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out deleted file mode 100644 index 8eb73010e8..0000000000 --- ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out +++ /dev/null @@ -1,98 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create file inputs -create table sih_i_part (key int, value string) partitioned by (p string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create file inputs -create table sih_i_part (key int, value string) partitioned by (p string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@sih_i_part -PREHOOK: query: insert overwrite table sih_i_part partition (p='1') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@sih_i_part@p=1 -POSTHOOK: query: insert overwrite table sih_i_part partition (p='1') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@sih_i_part@p=1 -POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table sih_i_part partition (p='2') select key+10000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@sih_i_part@p=2 -POSTHOOK: query: insert overwrite table sih_i_part partition (p='2') select key+10000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@sih_i_part@p=2 -POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table sih_i_part partition (p='3') select key+20000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@sih_i_part@p=3 -POSTHOOK: query: insert overwrite table sih_i_part partition (p='3') select key+20000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@sih_i_part@p=3 -POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table sih_src as select key, value from sih_i_part order by key, value -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@sih_i_part -PREHOOK: Input: default@sih_i_part@p=1 -PREHOOK: Input: default@sih_i_part@p=2 -PREHOOK: Input: default@sih_i_part@p=3 -POSTHOOK: query: create table sih_src as select key, value from sih_i_part order by key, value -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@sih_i_part -POSTHOOK: Input: default@sih_i_part@p=1 -POSTHOOK: Input: default@sih_i_part@p=2 -POSTHOOK: Input: default@sih_i_part@p=3 -POSTHOOK: Output: default@sih_src -PREHOOK: query: create table sih_src2 as select key, value from sih_src order by key, value -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@sih_src -POSTHOOK: query: create table sih_src2 as select key, value from sih_src order by key, value -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@sih_src -POSTHOOK: Output: default@sih_src2 -PREHOOK: query: -- Sample split, running locally limited by num tasks -select count(1) from sih_src tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@sih_src -#### A masked pattern was here #### -1500 -PREHOOK: query: -- sample two tables -select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@sih_src -PREHOOK: Input: default@sih_src2 -#### A masked pattern was here #### -3084 -PREHOOK: query: -- sample split, running locally limited by max bytes -select count(1) from sih_src tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@sih_src -#### A masked pattern was here #### -1500 diff --git ql/src/test/results/clientpositive/udaf_percentile_approx_20.q.out ql/src/test/results/clientpositive/udaf_percentile_approx_20.q.out deleted file mode 100644 index 14e743efa1..0000000000 --- ql/src/test/results/clientpositive/udaf_percentile_approx_20.q.out +++ /dev/null @@ -1,491 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) - -CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@bucket -PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket -POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket -PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket -POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket -PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket -POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket -PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket -POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket -PREHOOK: query: create table t1 (result double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t1 (result double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1 -PREHOOK: query: create table t2 (result double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t2 (result double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t2 -PREHOOK: query: create table t3 (result double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t3 (result double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t3 -PREHOOK: query: create table t4 (result double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t4 (result double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t4 -PREHOOK: query: create table t5 (result double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t5 (result double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t5 -PREHOOK: query: create table t6 (result double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t6 (result double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t6 -PREHOOK: query: create table t7 (result array) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t7 (result array) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t7 -PREHOOK: query: create table t8 (result array) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t8 (result array) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t8 -PREHOOK: query: create table t9 (result array) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t9 (result array) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t9 -PREHOOK: query: create table t10 (result array) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t10 (result array) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t10 -PREHOOK: query: create table t11 (result array) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t11 (result array) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t11 -PREHOOK: query: create table t12 (result array) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table t12 (result array) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t12 -PREHOOK: query: -- disable map-side aggregation -FROM bucket -insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) -insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) -insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) - -insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) -insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) -insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) - -insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) -insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) - -insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) -insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) -PREHOOK: type: QUERY -PREHOOK: Input: default@bucket -PREHOOK: Output: default@t1 -PREHOOK: Output: default@t10 -PREHOOK: Output: default@t11 -PREHOOK: Output: default@t12 -PREHOOK: Output: default@t2 -PREHOOK: Output: default@t3 -PREHOOK: Output: default@t4 -PREHOOK: Output: default@t5 -PREHOOK: Output: default@t6 -PREHOOK: Output: default@t7 -PREHOOK: Output: default@t8 -PREHOOK: Output: default@t9 -POSTHOOK: query: -- disable map-side aggregation -FROM bucket -insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) -insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) -insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) - -insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) -insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) -insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) - -insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) -insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) - -insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) -insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucket -POSTHOOK: Output: default@t1 -POSTHOOK: Output: default@t10 -POSTHOOK: Output: default@t11 -POSTHOOK: Output: default@t12 -POSTHOOK: Output: default@t2 -POSTHOOK: Output: default@t3 -POSTHOOK: Output: default@t4 -POSTHOOK: Output: default@t5 -POSTHOOK: Output: default@t6 -POSTHOOK: Output: default@t7 -POSTHOOK: Output: default@t8 -POSTHOOK: Output: default@t9 -POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -PREHOOK: query: select * from t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t2 -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -254.08333333333334 -PREHOOK: query: select * from t3 -PREHOOK: type: QUERY -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select * from t3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t4 -PREHOOK: type: QUERY -PREHOOK: Input: default@t4 -#### A masked pattern was here #### -POSTHOOK: query: select * from t4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t4 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t5 -PREHOOK: type: QUERY -PREHOOK: Input: default@t5 -#### A masked pattern was here #### -POSTHOOK: query: select * from t5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t5 -#### A masked pattern was here #### -254.08333333333334 -PREHOOK: query: select * from t6 -PREHOOK: type: QUERY -PREHOOK: Input: default@t6 -#### A masked pattern was here #### -POSTHOOK: query: select * from t6 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t6 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t7 -PREHOOK: type: QUERY -PREHOOK: Input: default@t7 -#### A masked pattern was here #### -POSTHOOK: query: select * from t7 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t7 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] -PREHOOK: query: select * from t8 -PREHOOK: type: QUERY -PREHOOK: Input: default@t8 -#### A masked pattern was here #### -POSTHOOK: query: select * from t8 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t8 -#### A masked pattern was here #### -[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006] -PREHOOK: query: select * from t9 -PREHOOK: type: QUERY -PREHOOK: Input: default@t9 -#### A masked pattern was here #### -POSTHOOK: query: select * from t9 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t9 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] -PREHOOK: query: select * from t10 -PREHOOK: type: QUERY -PREHOOK: Input: default@t10 -#### A masked pattern was here #### -POSTHOOK: query: select * from t10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t10 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] -PREHOOK: query: select * from t11 -PREHOOK: type: QUERY -PREHOOK: Input: default@t11 -#### A masked pattern was here #### -POSTHOOK: query: select * from t11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t11 -#### A masked pattern was here #### -[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006] -PREHOOK: query: select * from t12 -PREHOOK: type: QUERY -PREHOOK: Input: default@t12 -#### A masked pattern was here #### -POSTHOOK: query: select * from t12 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t12 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] -PREHOOK: query: -- enable map-side aggregation -FROM bucket -insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) -insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) -insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) - -insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) -insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) -insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) - -insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) -insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) - -insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) -insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) -PREHOOK: type: QUERY -PREHOOK: Input: default@bucket -PREHOOK: Output: default@t1 -PREHOOK: Output: default@t10 -PREHOOK: Output: default@t11 -PREHOOK: Output: default@t12 -PREHOOK: Output: default@t2 -PREHOOK: Output: default@t3 -PREHOOK: Output: default@t4 -PREHOOK: Output: default@t5 -PREHOOK: Output: default@t6 -PREHOOK: Output: default@t7 -PREHOOK: Output: default@t8 -PREHOOK: Output: default@t9 -POSTHOOK: query: -- enable map-side aggregation -FROM bucket -insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) -insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) -insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) - -insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) -insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) -insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) - -insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) -insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) - -insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) -insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) -insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucket -POSTHOOK: Output: default@t1 -POSTHOOK: Output: default@t10 -POSTHOOK: Output: default@t11 -POSTHOOK: Output: default@t12 -POSTHOOK: Output: default@t2 -POSTHOOK: Output: default@t3 -POSTHOOK: Output: default@t4 -POSTHOOK: Output: default@t5 -POSTHOOK: Output: default@t6 -POSTHOOK: Output: default@t7 -POSTHOOK: Output: default@t8 -POSTHOOK: Output: default@t9 -POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] -PREHOOK: query: select * from t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t2 -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -254.08333333333334 -PREHOOK: query: select * from t3 -PREHOOK: type: QUERY -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select * from t3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t4 -PREHOOK: type: QUERY -PREHOOK: Input: default@t4 -#### A masked pattern was here #### -POSTHOOK: query: select * from t4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t4 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t5 -PREHOOK: type: QUERY -PREHOOK: Input: default@t5 -#### A masked pattern was here #### -POSTHOOK: query: select * from t5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t5 -#### A masked pattern was here #### -254.08333333333334 -PREHOOK: query: select * from t6 -PREHOOK: type: QUERY -PREHOOK: Input: default@t6 -#### A masked pattern was here #### -POSTHOOK: query: select * from t6 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t6 -#### A masked pattern was here #### -255.5 -PREHOOK: query: select * from t7 -PREHOOK: type: QUERY -PREHOOK: Input: default@t7 -#### A masked pattern was here #### -POSTHOOK: query: select * from t7 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t7 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] -PREHOOK: query: select * from t8 -PREHOOK: type: QUERY -PREHOOK: Input: default@t8 -#### A masked pattern was here #### -POSTHOOK: query: select * from t8 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t8 -#### A masked pattern was here #### -[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006] -PREHOOK: query: select * from t9 -PREHOOK: type: QUERY -PREHOOK: Input: default@t9 -#### A masked pattern was here #### -POSTHOOK: query: select * from t9 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t9 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] -PREHOOK: query: select * from t10 -PREHOOK: type: QUERY -PREHOOK: Input: default@t10 -#### A masked pattern was here #### -POSTHOOK: query: select * from t10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t10 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] -PREHOOK: query: select * from t11 -PREHOOK: type: QUERY -PREHOOK: Input: default@t11 -#### A masked pattern was here #### -POSTHOOK: query: select * from t11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t11 -#### A masked pattern was here #### -[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006] -PREHOOK: query: select * from t12 -PREHOOK: type: QUERY -PREHOOK: Input: default@t12 -#### A masked pattern was here #### -POSTHOOK: query: select * from t12 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t12 -#### A masked pattern was here #### -[26.0,255.5,479.0,491.0] diff --git ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out deleted file mode 100644 index 7400d0bd7b..0000000000 --- ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out +++ /dev/null @@ -1,842 +0,0 @@ -PREHOOK: query: DESCRIBE FUNCTION percentile_cont -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION percentile_cont -POSTHOOK: type: DESCFUNCTION -percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). -PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont -POSTHOOK: type: DESCFUNCTION -percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). -Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont -Function type:BUILTIN -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(CAST(substr(value, 5) AS INT), 0.0), - percentile_cont(CAST(substr(value, 5) AS INT), 0.5), - percentile_cont(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(NULL, 0.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(NULL, 0.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 NULL -1 NULL -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 NULL -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 NULL -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 NULL -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 1.0 -1 1.0 -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 1.0 -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 1.0 -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 1.0 -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL -PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -NULL -PREHOOK: query: DESCRIBE FUNCTION percentile_disc -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION percentile_disc -POSTHOOK: type: DESCFUNCTION -There is no documentation for function 'percentile_disc' -PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc -POSTHOOK: type: DESCFUNCTION -There is no documentation for function 'percentile_disc' -Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc -Function type:BUILTIN -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(CAST(substr(value, 5) AS INT), 0.0), - percentile_disc(CAST(substr(value, 5) AS INT), 0.5), - percentile_disc(CAST(substr(value, 5) AS INT), 1.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(NULL, 0.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(NULL, 0.0) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 NULL -1 NULL -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 NULL -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 NULL -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 NULL -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) -FROM src -GROUP BY CAST(key AS INT) DIV 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 1.0 -1 1.0 -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 1.0 -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 1.0 -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 1.0 -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL -PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -NULL